diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1126bd5b73727..11e8e4adc6f0a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1136,7 +1136,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Legalize unpacked bitcasts to REINTERPRET_CAST. for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16, - MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32}) + MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32}) setOperationAction(ISD::BITCAST, VT, Custom); for (auto VT : diff --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll index bab42f3899170..7b7600b70ce76 100644 --- a/llvm/test/CodeGen/AArch64/sve-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll @@ -1,519 +1,1031 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: not --crash llc -mtriple=aarch64_be -mattr=+sve < %s +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s +; RUN: not --crash llc -mtriple=aarch64_be < %s -define @bitcast_i16_to_i8( %v) { -; CHECK-LABEL: bitcast_i16_to_i8: +; +; bitcast to nxv16i8 +; + +define @bitcast_nxv8i16_to_nxv16i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i16_to_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i32_to_i8( %v) { -; CHECK-LABEL: bitcast_i32_to_i8: +define @bitcast_nxv4i32_to_nxv16i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i32_to_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i64_to_i8( %v) { -; CHECK-LABEL: bitcast_i64_to_i8: +define @bitcast_nxv2i64_to_nxv16i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i64_to_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_half_to_i8( %v) { -; CHECK-LABEL: bitcast_half_to_i8: +define @bitcast_nxv8f16_to_nxv16i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv8f16_to_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_float_to_i8( %v) { -; CHECK-LABEL: bitcast_float_to_i8: +define @bitcast_nxv4f32_to_nxv16i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f32_to_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_double_to_i8( %v) { -; CHECK-LABEL: bitcast_double_to_i8: +define @bitcast_nxv2f64_to_nxv16i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f64_to_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i8_to_i16( %v) { -; CHECK-LABEL: bitcast_i8_to_i16: +define @bitcast_nxv8bf16_to_nxv16i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv8bf16_to_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv8i16 +; + +define @bitcast_nxv16i8_to_nxv8i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv16i8_to_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i32_to_i16( %v) { -; CHECK-LABEL: bitcast_i32_to_i16: +define @bitcast_nxv4i32_to_nxv8i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i32_to_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i64_to_i16( %v) { -; CHECK-LABEL: bitcast_i64_to_i16: +define @bitcast_nxv2i64_to_nxv8i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i64_to_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_half_to_i16( %v) { -; CHECK-LABEL: bitcast_half_to_i16: +define @bitcast_nxv8f16_to_nxv8i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv8f16_to_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_float_to_i16( %v) { -; CHECK-LABEL: bitcast_float_to_i16: +define @bitcast_nxv4f32_to_nxv8i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f32_to_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_double_to_i16( %v) { -; CHECK-LABEL: bitcast_double_to_i16: +define @bitcast_nxv2f64_to_nxv8i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f64_to_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i8_to_i32( %v) { -; CHECK-LABEL: bitcast_i8_to_i32: +define @bitcast_nxv8bf16_to_nxv8i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv8bf16_to_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv4i32 +; + +define @bitcast_nxv16i8_to_nxv4i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv16i8_to_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i16_to_i32( %v) { -; CHECK-LABEL: bitcast_i16_to_i32: +define @bitcast_nxv8i16_to_nxv4i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i16_to_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i64_to_i32( %v) { -; CHECK-LABEL: bitcast_i64_to_i32: +define @bitcast_nxv2i64_to_nxv4i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i64_to_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_half_to_i32( %v) { -; CHECK-LABEL: bitcast_half_to_i32: +define @bitcast_nxv8f16_to_nxv4i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv8f16_to_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_float_to_i32( %v) { -; CHECK-LABEL: bitcast_float_to_i32: +define @bitcast_nxv4f32_to_nxv4i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f32_to_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_double_to_i32( %v) { -; CHECK-LABEL: bitcast_double_to_i32: +define @bitcast_nxv2f64_to_nxv4i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f64_to_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i8_to_i64( %v) { -; CHECK-LABEL: bitcast_i8_to_i64: +define @bitcast_nxv8bf16_to_nxv4i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv8bf16_to_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv2i64 +; + +define @bitcast_nxv16i8_to_nxv2i64( %v) #0 { +; CHECK-LABEL: bitcast_nxv16i8_to_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i16_to_i64( %v) { -; CHECK-LABEL: bitcast_i16_to_i64: +define @bitcast_nxv8i16_to_nxv2i64( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i16_to_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i32_to_i64( %v) { -; CHECK-LABEL: bitcast_i32_to_i64: +define @bitcast_nxv4i32_to_nxv2i64( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i32_to_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_half_to_i64( %v) { -; CHECK-LABEL: bitcast_half_to_i64: +define @bitcast_nxv8f16_to_nxv2i64( %v) #0 { +; CHECK-LABEL: bitcast_nxv8f16_to_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_float_to_i64( %v) { -; CHECK-LABEL: bitcast_float_to_i64: +define @bitcast_nxv4f32_to_nxv2i64( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f32_to_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_double_to_i64( %v) { -; CHECK-LABEL: bitcast_double_to_i64: +define @bitcast_nxv2f64_to_nxv2i64( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f64_to_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i8_to_half( %v) { -; CHECK-LABEL: bitcast_i8_to_half: +define @bitcast_nxv8bf16_to_nxv2i64( %v) #0 { +; CHECK-LABEL: bitcast_nxv8bf16_to_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv8f16 +; + +define @bitcast_nxv16i8_to_nxv8f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv16i8_to_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i16_to_half( %v) { -; CHECK-LABEL: bitcast_i16_to_half: +define @bitcast_nxv8i16_to_nxv8f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i16_to_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i32_to_half( %v) { -; CHECK-LABEL: bitcast_i32_to_half: +define @bitcast_nxv4i32_to_nxv8f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i32_to_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i64_to_half( %v) { -; CHECK-LABEL: bitcast_i64_to_half: +define @bitcast_nxv2i64_to_nxv8f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i64_to_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_float_to_half( %v) { -; CHECK-LABEL: bitcast_float_to_half: +define @bitcast_nxv4f32_to_nxv8f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f32_to_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_double_to_half( %v) { -; CHECK-LABEL: bitcast_double_to_half: +define @bitcast_nxv2f64_to_nxv8f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f64_to_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i8_to_float( %v) { -; CHECK-LABEL: bitcast_i8_to_float: +define @bitcast_nxv8bf16_to_nxv8f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv8bf16_to_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv4f32 +; + +define @bitcast_nxv16i8_to_nxv4f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv16i8_to_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i16_to_float( %v) { -; CHECK-LABEL: bitcast_i16_to_float: +define @bitcast_nxv8i16_to_nxv4f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i16_to_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i32_to_float( %v) { -; CHECK-LABEL: bitcast_i32_to_float: +define @bitcast_nxv4i32_to_nxv4f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i32_to_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i64_to_float( %v) { -; CHECK-LABEL: bitcast_i64_to_float: +define @bitcast_nxv2i64_to_nxv4f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i64_to_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_half_to_float( %v) { -; CHECK-LABEL: bitcast_half_to_float: +define @bitcast_nxv8f16_to_nxv4f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv8f16_to_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_double_to_float( %v) { -; CHECK-LABEL: bitcast_double_to_float: +define @bitcast_nxv2f64_to_nxv4f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f64_to_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i8_to_double( %v) { -; CHECK-LABEL: bitcast_i8_to_double: +define @bitcast_nxv8bf16_to_nxv4f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv8bf16_to_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv2f64 +; + +define @bitcast_nxv16i8_to_nxv2f64( %v) #0 { +; CHECK-LABEL: bitcast_nxv16i8_to_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i16_to_double( %v) { -; CHECK-LABEL: bitcast_i16_to_double: +define @bitcast_nxv8i16_to_nxv2f64( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i16_to_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i32_to_double( %v) { -; CHECK-LABEL: bitcast_i32_to_double: +define @bitcast_nxv4i32_to_nxv2f64( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i32_to_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_i64_to_double( %v) { -; CHECK-LABEL: bitcast_i64_to_double: +define @bitcast_nxv2i64_to_nxv2f64( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i64_to_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_half_to_double( %v) { -; CHECK-LABEL: bitcast_half_to_double: +define @bitcast_nxv8f16_to_nxv2f64( %v) #0 { +; CHECK-LABEL: bitcast_nxv8f16_to_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_float_to_double( %v) { -; CHECK-LABEL: bitcast_float_to_double: +define @bitcast_nxv4f32_to_nxv2f64( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f32_to_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_bfloat_to_i8( %v) #0 { -; CHECK-LABEL: bitcast_bfloat_to_i8: +define @bitcast_nxv8bf16_to_nxv2f64( %v) #0 { +; CHECK-LABEL: bitcast_nxv8bf16_to_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_bfloat_to_i16( %v) #0 { -; CHECK-LABEL: bitcast_bfloat_to_i16: +; +; bitcast to nxv8bf16 +; + +define @bitcast_nxv16i8_to_nxv8bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv16i8_to_nxv8bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_bfloat_to_i32( %v) #0 { -; CHECK-LABEL: bitcast_bfloat_to_i32: +define @bitcast_nxv8i16_to_nxv8bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i16_to_nxv8bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_bfloat_to_i64( %v) #0 { -; CHECK-LABEL: bitcast_bfloat_to_i64: +define @bitcast_nxv4i32_to_nxv8bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i32_to_nxv8bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_bfloat_to_half( %v) #0 { -; CHECK-LABEL: bitcast_bfloat_to_half: +define @bitcast_nxv2i64_to_nxv8bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i64_to_nxv8bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_bfloat_to_float( %v) #0 { -; CHECK-LABEL: bitcast_bfloat_to_float: +define @bitcast_nxv8f16_to_nxv8bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv8f16_to_nxv8bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_bfloat_to_double( %v) #0 { -; CHECK-LABEL: bitcast_bfloat_to_double: +define @bitcast_nxv4f32_to_nxv8bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f32_to_nxv8bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_i8_to_bfloat( %v) #0 { -; CHECK-LABEL: bitcast_i8_to_bfloat: +define @bitcast_nxv2f64_to_nxv8bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f64_to_nxv8bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %bc = bitcast %v to + %bc = bitcast %v to ret %bc } -define @bitcast_i16_to_bfloat( %v) #0 { -; CHECK-LABEL: bitcast_i16_to_bfloat: +; +; bitcast to nxv8i8 +; + +define @bitcast_nxv4i16_to_nxv8i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i16_to_nxv8i8: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_i32_to_bfloat( %v) #0 { -; CHECK-LABEL: bitcast_i32_to_bfloat: +define @bitcast_nxv2i32_to_nxv8i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i32_to_nxv8i8: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_i64_to_bfloat( %v) #0 { -; CHECK-LABEL: bitcast_i64_to_bfloat: +define @bitcast_nxv4f16_to_nxv8i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f16_to_nxv8i8: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_half_to_bfloat( %v) #0 { -; CHECK-LABEL: bitcast_half_to_bfloat: +define @bitcast_nxv2f32_to_nxv8i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f32_to_nxv8i8: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_float_to_bfloat( %v) #0 { -; CHECK-LABEL: bitcast_float_to_bfloat: +define @bitcast_nxv4bf16_to_nxv8i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv4bf16_to_nxv8i8: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_double_to_bfloat( %v) #0 { -; CHECK-LABEL: bitcast_double_to_bfloat: +; +; bitcast to nxv4i16 +; + +define @bitcast_nxv8i8_to_nxv4i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i8_to_nxv4i16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_short2_half_to_i16( %v) { -; CHECK-LABEL: bitcast_short2_half_to_i16: +define @bitcast_nxv2i32_to_nxv4i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i32_to_nxv4i16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_short4_half_to_i16( %v) { -; CHECK-LABEL: bitcast_short4_half_to_i16: +define @bitcast_nxv4f16_to_nxv4i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f16_to_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_short2_bfloat_to_i16( %v) #0 { -; CHECK-LABEL: bitcast_short2_bfloat_to_i16: +; TODO: Invalid code generation because the bitcast must change the in-register +; layout when casting between unpacked scalable vector types. +define @bitcast_nxv2f32_to_nxv4i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f32_to_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_short4_bfloat_to_i16( %v) #0 { -; CHECK-LABEL: bitcast_short4_bfloat_to_i16: +define @bitcast_nxv4bf16_to_nxv4i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4bf16_to_nxv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_short2_i16_to_half( %v) { -; CHECK-LABEL: bitcast_short2_i16_to_half: +; +; bitcast to nxv2i32 +; + +define @bitcast_nxv8i8_to_nxv2i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i8_to_nxv2i32: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc } -define @bitcast_short4_i16_to_half( %v) { -; CHECK-LABEL: bitcast_short4_i16_to_half: +define @bitcast_nxv4i16_to_nxv2i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i16_to_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; TODO: Invalid code generation because the bitcast must change the in-register +; layout when casting between unpacked scalable vector types. +define @bitcast_nxv4f16_to_nxv2i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f16_to_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv2f32_to_nxv2i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f32_to_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; TODO: Invalid code generation because the bitcast must change the in-register +; layout when casting between unpacked scalable vector types. +define @bitcast_nxv4bf16_to_nxv2i32( %v) #0 { +; CHECK-LABEL: bitcast_nxv4bf16_to_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv4f16 +; + +define @bitcast_nxv8i8_to_nxv4f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i8_to_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv4i16_to_nxv4f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i16_to_nxv4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } -define @bitcast_short2_i16_to_bfloat( %v) #0 { -; CHECK-LABEL: bitcast_short2_i16_to_bfloat: +; TODO: Invalid code generation because the bitcast must change the in-register +; layout when casting between unpacked scalable vector types. +define @bitcast_nxv2i32_to_nxv4f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i32_to_nxv4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret - %bc = bitcast %v to - ret %bc + %bc = bitcast %v to + ret %bc +} + +; TODO: Invalid code generation because the bitcast must change the in-register +; layout when casting between unpacked scalable vector types. +define @bitcast_nxv2f32_to_nxv4f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f32_to_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv4bf16_to_nxv4f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4bf16_to_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv2f32 +; + +define @bitcast_nxv8i8_to_nxv2f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i8_to_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc } -define @bitcast_short4_i16_to_bfloat( %v) #0 { -; CHECK-LABEL: bitcast_short4_i16_to_bfloat: +; TODO: Invalid code generation because the bitcast must change the in-register +; layout when casting between unpacked scalable vector types. +define @bitcast_nxv4i16_to_nxv2f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i16_to_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv2i32_to_nxv2f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i32_to_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; TODO: Invalid code generation because the bitcast must change the in-register +; layout when casting between unpacked scalable vector types. +define @bitcast_nxv4f16_to_nxv2f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f16_to_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; TODO: Invalid code generation because the bitcast must change the in-register +; layout when casting between unpacked scalable vector types. +define @bitcast_nxv4bf16_to_nxv2f32( %v) #0 { +; CHECK-LABEL: bitcast_nxv4bf16_to_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv4bf16 +; + +define @bitcast_nxv8i8_to_nxv4bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv8i8_to_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv4i16_to_nxv4bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i16_to_nxv4bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %bc = bitcast %v to ret %bc } +; TODO: Invalid code generation because the bitcast must change the in-register +; layout when casting between unpacked scalable vector types. +define @bitcast_nxv2i32_to_nxv4bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i32_to_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv4f16_to_nxv4bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4f16_to_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; TODO: Invalid code generation because the bitcast must change the in-register +; layout when casting between unpacked scalable vector types. +define @bitcast_nxv2f32_to_nxv4bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f32_to_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv4i8 +; + +define @bitcast_nxv2i16_to_nxv4i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i16_to_nxv4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1b { z0.s }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv2f16_to_nxv4i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f16_to_nxv4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1b { z0.s }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv2bf16_to_nxv4i8( %v) #0 { +; CHECK-LABEL: bitcast_nxv2bf16_to_nxv4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1b { z0.s }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv2i16 +; + +define @bitcast_nxv4i8_to_nxv2i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i8_to_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1b { z0.s }, p0, [sp, #3, mul vl] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv2f16_to_nxv2i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f16_to_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv2bf16_to_nxv2i16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2bf16_to_nxv2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv2f16 +; + +define @bitcast_nxv4i8_to_nxv2f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i8_to_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1b { z0.s }, p0, [sp, #3, mul vl] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv2i16_to_nxv2f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i16_to_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv2bf16_to_nxv2f16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2bf16_to_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; bitcast to nxv2bf16 +; + +define @bitcast_nxv4i8_to_nxv2bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv4i8_to_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1b { z0.s }, p0, [sp, #3, mul vl] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv2i16_to_nxv2bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2i16_to_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_nxv2f16_to_nxv2bf16( %v) #0 { +; CHECK-LABEL: bitcast_nxv2f16_to_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +; Other +; + define @bitcast_short_float_to_i32( %v) #0 { ; CHECK-LABEL: bitcast_short_float_to_i32: ; CHECK: // %bb.0: @@ -537,6 +1049,8 @@ define @bitcast_short_i32_to_float( %v) ret %extended } +; TODO: Invalid code generation because the bitcast must change the in-register +; layout when casting between unpacked scalable vector types. define @bitcast_short_half_to_float( %v) #0 { ; CHECK-LABEL: bitcast_short_half_to_float: ; CHECK: // %bb.0: @@ -549,4 +1063,4 @@ define @bitcast_short_half_to_float( %v) } ; +bf16 is required for the bfloat version. -attributes #0 = { "target-features"="+sve,+bf16" } +attributes #0 = { nounwind "target-features"="+sve,+bf16" }