[SystemZ] Support i128 as legal type in VRs (#74625)

On processors supporting vector registers and SIMD instructions, enable i128 as legal type in VRs. This allows many operations to be implemented via native instructions directly in VRs (including add, subtract, logical operations and shifts). For a few other operations (e.g. multiply and divide, as well as atomic operations), we need to move the i128 value back to a GPR pair to use the corresponding instruction there. Overall, this is still beneficial. The patch includes the following LLVM changes: - Enable i128 as legal type - Set up legal operations (in SystemZInstrVector.td) - Custom expansion for i128 add/subtract with carry - Custom expansion for i128 comparisons and selects - Support for moving i128 to/from GPR pairs when required - Handle 128-bit integer constant values everywhere - Use i128 as intrinsic operand type where appropriate - Updated and new test cases In addition, clang builtins are updated to reflect the intrinsic operand type changes (which also improves compatibility with GCC).
llvm · Dec 15, 2023 · a65ccc1 · a65ccc1
1 parent 77c40ea
commit a65ccc1
Show file tree

Hide file tree

Showing 72 changed files with 4,904 additions and 835 deletions.
diff --git a/clang/include/clang/Basic/BuiltinsSystemZ.def b/clang/include/clang/Basic/BuiltinsSystemZ.def
@@ -64,14 +64,14 @@ TARGET_BUILTIN(__builtin_s390_vupllh, "V4UiV8Us", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vupllf, "V2ULLiV4Ui", "nc", "vector")
 
 // Vector integer instructions (chapter 22 of the PoP)
-TARGET_BUILTIN(__builtin_s390_vaq, "V16UcV16UcV16Uc", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vacq, "V16UcV16UcV16UcV16Uc", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vaq, "SLLLiSLLLiSLLLi", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vacq, "ULLLiULLLiULLLiULLLi", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vaccb, "V16UcV16UcV16Uc", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vacch, "V8UsV8UsV8Us", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vaccf, "V4UiV4UiV4Ui", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vaccg, "V2ULLiV2ULLiV2ULLi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vaccq, "V16UcV16UcV16Uc", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vacccq, "V16UcV16UcV16UcV16Uc", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vaccq, "ULLLiULLLiULLLi", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vacccq, "ULLLiULLLiULLLiULLLi", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vavgb, "V16ScV16ScV16Sc", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vavgh, "V8SsV8SsV8Ss", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vavgf, "V4SiV4SiV4Si", "nc", "vector")
@@ -116,11 +116,11 @@ TARGET_BUILTIN(__builtin_s390_verllvg, "V2ULLiV2ULLiV2ULLi", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vgfmb, "V8UsV16UcV16Uc", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vgfmh, "V4UiV8UsV8Us", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vgfmf, "V2ULLiV4UiV4Ui", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vgfmg, "V16UcV2ULLiV2ULLi", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vgfmg, "ULLLiV2ULLiV2ULLi", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vgfmab, "V8UsV16UcV16UcV8Us", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vgfmah, "V4UiV8UsV8UsV4Ui", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vgfmaf, "V2ULLiV4UiV4UiV2ULLi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vgfmag, "V16UcV2ULLiV2ULLiV16Uc", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vgfmag, "ULLLiV2ULLiV2ULLiULLLi", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vmahb, "V16ScV16ScV16ScV16Sc", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vmahh, "V8SsV8SsV8SsV8Ss", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vmahf, "V4SiV4SiV4SiV4Si", "nc", "vector")
@@ -161,14 +161,14 @@ TARGET_BUILTIN(__builtin_s390_vpopctb, "V16UcV16Uc", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vpopcth, "V8UsV8Us", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vpopctf, "V4UiV4Ui", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vpopctg, "V2ULLiV2ULLi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vsq, "V16UcV16UcV16Uc", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vsbcbiq, "V16UcV16UcV16UcV16Uc", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vsbiq, "V16UcV16UcV16UcV16Uc", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vsq, "SLLLiSLLLiSLLLi", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vsbcbiq, "ULLLiULLLiULLLiULLLi", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vsbiq, "ULLLiULLLiULLLiULLLi", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vscbib, "V16UcV16UcV16Uc", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vscbih, "V8UsV8UsV8Us", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vscbif, "V4UiV4UiV4Ui", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vscbig, "V2ULLiV2ULLiV2ULLi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vscbiq, "V16UcV16UcV16Uc", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vscbiq, "ULLLiULLLiULLLi", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vsl, "V16UcV16UcV16Uc", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vslb, "V16UcV16UcV16Uc", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vsldb, "V16UcV16UcV16UcIi", "nc", "vector")
@@ -180,8 +180,8 @@ TARGET_BUILTIN(__builtin_s390_vsumb, "V4UiV16UcV16Uc", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vsumh, "V4UiV8UsV8Us", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vsumgh, "V2ULLiV8UsV8Us", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vsumgf, "V2ULLiV4UiV4Ui", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vsumqf, "V16UcV4UiV4Ui", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vsumqg, "V16UcV2ULLiV2ULLi", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vsumqf, "ULLLiV4UiV4Ui", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vsumqg, "ULLLiV2ULLiV2ULLi", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vtm, "iV16UcV16Uc", "nc", "vector")
 
 // Vector string instructions (chapter 23 of the PoP)
@@ -256,7 +256,7 @@ TARGET_BUILTIN(__builtin_s390_vftcidb, "V2SLLiV2dIii*", "nc", "vector")
 TARGET_BUILTIN(__builtin_s390_vlrlr, "V16ScUivC*", "", "vector-enhancements-1")
 TARGET_BUILTIN(__builtin_s390_vstrlr, "vV16ScUiv*", "", "vector-enhancements-1")
 TARGET_BUILTIN(__builtin_s390_vbperm, "V2ULLiV16UcV16Uc", "nc", "vector-enhancements-1")
-TARGET_BUILTIN(__builtin_s390_vmslg, "V16UcV2ULLiV2ULLiV16UcIi", "nc", "vector-enhancements-1")
+TARGET_BUILTIN(__builtin_s390_vmslg, "ULLLiV2ULLiV2ULLiULLLiIi", "nc", "vector-enhancements-1")
 TARGET_BUILTIN(__builtin_s390_vfmaxdb, "V2dV2dV2dIi", "nc", "vector-enhancements-1")
 TARGET_BUILTIN(__builtin_s390_vfmindb, "V2dV2dV2dIi", "nc", "vector-enhancements-1")
 TARGET_BUILTIN(__builtin_s390_vfnmadb, "V2dV2dV2dV2d", "nc", "vector-enhancements-1")

diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h
@@ -8359,7 +8359,7 @@ vec_min(__vector double __a, __vector double __b) {
 
 static inline __ATTRS_ai __vector unsigned char
 vec_add_u128(__vector unsigned char __a, __vector unsigned char __b) {
-  return __builtin_s390_vaq(__a, __b);
+  return (__vector unsigned char)((__int128)__a + (__int128)__b);
 }
 
 /*-- vec_addc ---------------------------------------------------------------*/
@@ -8388,23 +8388,28 @@ vec_addc(__vector unsigned long long __a, __vector unsigned long long __b) {
 
 static inline __ATTRS_ai __vector unsigned char
 vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) {
-  return __builtin_s390_vaccq(__a, __b);
+  return (__vector unsigned char)
+         __builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b);
 }
 
 /*-- vec_adde_u128 ----------------------------------------------------------*/
 
 static inline __ATTRS_ai __vector unsigned char
 vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b,
               __vector unsigned char __c) {
-  return __builtin_s390_vacq(__a, __b, __c);
+  return (__vector unsigned char)
+         __builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b,
+                             (unsigned __int128)__c);
 }
 
 /*-- vec_addec_u128 ---------------------------------------------------------*/
 
 static inline __ATTRS_ai __vector unsigned char
 vec_addec_u128(__vector unsigned char __a, __vector unsigned char __b,
                __vector unsigned char __c) {
-  return __builtin_s390_vacccq(__a, __b, __c);
+  return (__vector unsigned char)
+         __builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b,
+                               (unsigned __int128)__c);
 }
 
 /*-- vec_avg ----------------------------------------------------------------*/
@@ -8478,7 +8483,7 @@ vec_gfmsum(__vector unsigned int __a, __vector unsigned int __b) {
 static inline __ATTRS_o_ai __vector unsigned char
 vec_gfmsum_128(__vector unsigned long long __a,
                __vector unsigned long long __b) {
-  return __builtin_s390_vgfmg(__a, __b);
+  return (__vector unsigned char)__builtin_s390_vgfmg(__a, __b);
 }
 
 /*-- vec_gfmsum_accum -------------------------------------------------------*/
@@ -8507,7 +8512,8 @@ static inline __ATTRS_o_ai __vector unsigned char
 vec_gfmsum_accum_128(__vector unsigned long long __a,
                      __vector unsigned long long __b,
                      __vector unsigned char __c) {
-  return __builtin_s390_vgfmag(__a, __b, __c);
+  return (__vector unsigned char)
+         __builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c);
 }
 
 /*-- vec_mladd --------------------------------------------------------------*/
@@ -8797,15 +8803,21 @@ vec_mulo(__vector unsigned int __a, __vector unsigned int __b) {
 /*-- vec_msum_u128 ----------------------------------------------------------*/
 
 #if __ARCH__ >= 12
+extern __ATTRS_o __vector unsigned char
+vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b,
+              __vector unsigned char __c, int __d)
+  __constant_range(__d, 0, 15);
+
 #define vec_msum_u128(X, Y, Z, W) \
-  ((__vector unsigned char)__builtin_s390_vmslg((X), (Y), (Z), (W)));
+  ((__typeof__((vec_msum_u128)((X), (Y), (Z), (W)))) \
+   __builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W)))
 #endif
 
 /*-- vec_sub_u128 -----------------------------------------------------------*/
 
 static inline __ATTRS_ai __vector unsigned char
 vec_sub_u128(__vector unsigned char __a, __vector unsigned char __b) {
-  return __builtin_s390_vsq(__a, __b);
+  return (__vector unsigned char)((__int128)__a - (__int128)__b);
 }
 
 /*-- vec_subc ---------------------------------------------------------------*/
@@ -8834,23 +8846,28 @@ vec_subc(__vector unsigned long long __a, __vector unsigned long long __b) {
 
 static inline __ATTRS_ai __vector unsigned char
 vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) {
-  return __builtin_s390_vscbiq(__a, __b);
+  return (__vector unsigned char)
+         __builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b);
 }
 
 /*-- vec_sube_u128 ----------------------------------------------------------*/
 
 static inline __ATTRS_ai __vector unsigned char
 vec_sube_u128(__vector unsigned char __a, __vector unsigned char __b,
               __vector unsigned char __c) {
-  return __builtin_s390_vsbiq(__a, __b, __c);
+  return (__vector unsigned char)
+         __builtin_s390_vsbiq((unsigned __int128)__a, (unsigned __int128)__b,
+                              (unsigned __int128)__c);
 }
 
 /*-- vec_subec_u128 ---------------------------------------------------------*/
 
 static inline __ATTRS_ai __vector unsigned char
 vec_subec_u128(__vector unsigned char __a, __vector unsigned char __b,
                __vector unsigned char __c) {
-  return __builtin_s390_vsbcbiq(__a, __b, __c);
+  return (__vector unsigned char)
+         __builtin_s390_vsbcbiq((unsigned __int128)__a, (unsigned __int128)__b,
+                                (unsigned __int128)__c);
 }
 
 /*-- vec_sum2 ---------------------------------------------------------------*/
@@ -8869,12 +8886,12 @@ vec_sum2(__vector unsigned int __a, __vector unsigned int __b) {
 
 static inline __ATTRS_o_ai __vector unsigned char
 vec_sum_u128(__vector unsigned int __a, __vector unsigned int __b) {
-  return __builtin_s390_vsumqf(__a, __b);
+  return (__vector unsigned char)__builtin_s390_vsumqf(__a, __b);
 }
 
 static inline __ATTRS_o_ai __vector unsigned char
 vec_sum_u128(__vector unsigned long long __a, __vector unsigned long long __b) {
-  return __builtin_s390_vsumqg(__a, __b);
+  return (__vector unsigned char)__builtin_s390_vsumqg(__a, __b);
 }
 
 /*-- vec_sum4 ---------------------------------------------------------------*/

diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-error2.c b/clang/test/CodeGen/SystemZ/builtins-systemz-error2.c
@@ -1,11 +1,8 @@
 // REQUIRES: systemz-registered-target
 // RUN: %clang_cc1 -triple s390x-ibm-linux -S -emit-llvm %s -verify -o -
 
-typedef __attribute__((vector_size(16))) char v16i8;
-
-v16i8 f0(v16i8 a, v16i8 b) {
-  __builtin_tbegin ((void *)0);         // expected-error {{'__builtin_tbegin' needs target feature transactional-execution}}
-  v16i8 tmp = __builtin_s390_vaq(a, b); // expected-error {{'__builtin_s390_vaq' needs target feature vector}}
-  return tmp;
+__int128 f0(__int128 a, __int128 b) {
+  __builtin_tbegin ((void *)0);    // expected-error {{'__builtin_tbegin' needs target feature transactional-execution}}
+  return __builtin_s390_vaq(a, b); // expected-error {{'__builtin_s390_vaq' needs target feature vector}}
 }
 
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c
@@ -21,6 +21,8 @@ volatile vec_ushort vus;
 volatile vec_uint vui;
 volatile vec_ulong vul;
 volatile vec_double vd;
+volatile signed __int128 si128;
+volatile unsigned __int128 ui128;
 
 volatile unsigned int len;
 volatile unsigned char amt;
@@ -111,14 +113,14 @@ void test_core(void) {
 }
 
 void test_integer(void) {
-  vuc = __builtin_s390_vaq(vuc, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.vaq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  vuc = __builtin_s390_vacq(vuc, vuc, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.vacq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  vuc = __builtin_s390_vaccq(vuc, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.vaccq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  vuc = __builtin_s390_vacccq(vuc, vuc, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.vacccq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  si128 = __builtin_s390_vaq(si128, si128);
+  // CHECK: call i128 @llvm.s390.vaq(i128 %{{.*}}, i128 %{{.*}})
+  ui128 = __builtin_s390_vacq(ui128, ui128, ui128);
+  // CHECK: call i128 @llvm.s390.vacq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
+  ui128 = __builtin_s390_vaccq(ui128, ui128);
+  // CHECK: call i128 @llvm.s390.vaccq(i128 %{{.*}}, i128 %{{.*}})
+  ui128 = __builtin_s390_vacccq(ui128, ui128, ui128);
+  // CHECK: call i128 @llvm.s390.vacccq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
 
   vuc = __builtin_s390_vaccb(vuc, vuc);
   // CHECK: call <16 x i8> @llvm.s390.vaccb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
@@ -209,17 +211,17 @@ void test_integer(void) {
   // CHECK: call <4 x i32> @llvm.s390.vgfmh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   vul = __builtin_s390_vgfmf(vui, vui);
   // CHECK: call <2 x i64> @llvm.s390.vgfmf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-  vuc = __builtin_s390_vgfmg(vul, vul);
-  // CHECK: call <16 x i8> @llvm.s390.vgfmg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  ui128 = __builtin_s390_vgfmg(vul, vul);
+  // CHECK: call i128 @llvm.s390.vgfmg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 
   vus = __builtin_s390_vgfmab(vuc, vuc, vus);
   // CHECK: call <8 x i16> @llvm.s390.vgfmab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i16> %{{.*}})
   vui = __builtin_s390_vgfmah(vus, vus, vui);
   // CHECK: call <4 x i32> @llvm.s390.vgfmah(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
   vul = __builtin_s390_vgfmaf(vui, vui, vul);
   // CHECK: call <2 x i64> @llvm.s390.vgfmaf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
-  vuc = __builtin_s390_vgfmag(vul, vul, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.vgfmag(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <16 x i8> %{{.*}})
+  ui128 = __builtin_s390_vgfmag(vul, vul, ui128);
+  // CHECK: call i128 @llvm.s390.vgfmag(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}})
 
   vsc = __builtin_s390_vmahb(vsc, vsc, vsc);
   // CHECK: call <16 x i8> @llvm.s390.vmahb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
@@ -308,14 +310,14 @@ void test_integer(void) {
   vul = __builtin_s390_vpopctg(vul);
   // CHECK: call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %{{.*}})
 
-  vuc = __builtin_s390_vsq(vuc, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.vsq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  vuc = __builtin_s390_vsbiq(vuc, vuc, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.vsbiq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  vuc = __builtin_s390_vscbiq(vuc, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.vscbiq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  vuc = __builtin_s390_vsbcbiq(vuc, vuc, vuc);
-  // CHECK: call <16 x i8> @llvm.s390.vsbcbiq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  si128 = __builtin_s390_vsq(si128, si128);
+  // CHECK: call i128 @llvm.s390.vsq(i128 %{{.*}}, i128 %{{.*}})
+  ui128 = __builtin_s390_vsbiq(ui128, ui128, ui128);
+  // CHECK: call i128 @llvm.s390.vsbiq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
+  ui128 = __builtin_s390_vscbiq(ui128, ui128);
+  // CHECK: call i128 @llvm.s390.vscbiq(i128 %{{.*}}, i128 %{{.*}})
+  ui128 = __builtin_s390_vsbcbiq(ui128, ui128, ui128);
+  // CHECK: call i128 @llvm.s390.vsbcbiq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
 
   vuc = __builtin_s390_vscbib(vuc, vuc);
   // CHECK: call <16 x i8> @llvm.s390.vscbib(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
@@ -354,10 +356,10 @@ void test_integer(void) {
   // CHECK: call <2 x i64> @llvm.s390.vsumgh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   vul = __builtin_s390_vsumgf(vui, vui);
   // CHECK: call <2 x i64> @llvm.s390.vsumgf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-  vuc = __builtin_s390_vsumqf(vui, vui);
-  // CHECK: call <16 x i8> @llvm.s390.vsumqf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-  vuc = __builtin_s390_vsumqg(vul, vul);
-  // CHECK: call <16 x i8> @llvm.s390.vsumqg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  ui128 = __builtin_s390_vsumqf(vui, vui);
+  // CHECK: call i128 @llvm.s390.vsumqf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  ui128 = __builtin_s390_vsumqg(vul, vul);
+  // CHECK: call i128 @llvm.s390.vsumqg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
 
   len = __builtin_s390_vtm(vuc, vuc);
   // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})

diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector2-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector2-error.c
@@ -23,14 +23,15 @@ volatile vec_uint vui;
 volatile vec_ulong vul;
 volatile vec_double vd;
 volatile vec_float vf;
+volatile unsigned __int128 ui128;
 
 volatile unsigned int len;
 int cc;
 
 void test_integer(void) {
-  __builtin_s390_vmslg(vul, vul, vuc, -1);   // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  __builtin_s390_vmslg(vul, vul, vuc, 16);   // expected-error-re {{argument value {{.*}} is outside the valid range}}
-  __builtin_s390_vmslg(vul, vul, vuc, len);  // expected-error {{must be a constant integer}}
+  __builtin_s390_vmslg(vul, vul, ui128, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __builtin_s390_vmslg(vul, vul, ui128, 16); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+  __builtin_s390_vmslg(vul, vul, ui128, len);// expected-error {{must be a constant integer}}
 }
 
 void test_float(void) {

diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector2.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector2.c
@@ -23,6 +23,7 @@ volatile vec_uint vui;
 volatile vec_ulong vul;
 volatile vec_double vd;
 volatile vec_float vf;
+volatile unsigned __int128 ui128;
 
 volatile unsigned int len;
 const void * volatile cptr;
@@ -41,10 +42,10 @@ void test_core(void) {
 }
 
 void test_integer(void) {
-  vuc = __builtin_s390_vmslg(vul, vul, vuc, 0);
-  // CHECK: call <16 x i8> @llvm.s390.vmslg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
-  vuc = __builtin_s390_vmslg(vul, vul, vuc, 15);
-  // CHECK: call <16 x i8> @llvm.s390.vmslg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+  ui128 = __builtin_s390_vmslg(vul, vul, ui128, 0);
+  // CHECK: call i128 @llvm.s390.vmslg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}}, i32 0)
+  ui128 = __builtin_s390_vmslg(vul, vul, ui128, 15);
+  // CHECK: call i128 @llvm.s390.vmslg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}}, i32 15)
 }
 
 void test_float(void) {