diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index c1c1f0a1024d0..55aea17d29f55 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -621,6 +621,9 @@ def FeatureZCRegMoveGPR64 : SubtargetFeature<"zcm-gpr64", "HasZeroCycleRegMoveGP def FeatureZCRegMoveGPR32 : SubtargetFeature<"zcm-gpr32", "HasZeroCycleRegMoveGPR32", "true", "Has zero-cycle register moves for GPR32 registers">; +def FeatureZCRegMoveFPR128 : SubtargetFeature<"zcm-fpr128", "HasZeroCycleRegMoveFPR128", "true", + "Has zero-cycle register moves for FPR128 registers">; + def FeatureZCRegMoveFPR64 : SubtargetFeature<"zcm-fpr64", "HasZeroCycleRegMoveFPR64", "true", "Has zero-cycle register moves for FPR64 registers">; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index d15f90deba74e..103e56a83a5de 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5318,15 +5318,49 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR64RegClass.contains(DestReg) && AArch64::FPR64RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + if (Subtarget.hasZeroCycleRegMoveFPR128() && + !Subtarget.hasZeroCycleRegMoveFPR64() && + !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub, + &AArch64::FPR128RegClass); + // This instruction is reading and writing Q registers. This may upset + // the register scavenger and machine verifier, so we need to indicate + // that we are reading an undefined value from SrcRegQ, but a proper + // value from SrcReg. + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ) + .addReg(SrcRegQ, RegState::Undef) + .addReg(SrcRegQ, RegState::Undef) + .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); + } else { + BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } return; } if (AArch64::FPR32RegClass.contains(DestReg) && AArch64::FPR32RegClass.contains(SrcReg)) { - if (Subtarget.hasZeroCycleRegMoveFPR64() && - !Subtarget.hasZeroCycleRegMoveFPR32()) { + if (Subtarget.hasZeroCycleRegMoveFPR128() && + !Subtarget.hasZeroCycleRegMoveFPR64() && + !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub, + &AArch64::FPR128RegClass); + // This instruction is reading and writing Q registers. This may upset + // the register scavenger and machine verifier, so we need to indicate + // that we are reading an undefined value from SrcRegQ, but a proper + // value from SrcReg. + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ) + .addReg(SrcRegQ, RegState::Undef) + .addReg(SrcRegQ, RegState::Undef) + .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); + } else if (Subtarget.hasZeroCycleRegMoveFPR64() && + !Subtarget.hasZeroCycleRegMoveFPR32()) { const TargetRegisterInfo *TRI = &getRegisterInfo(); MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub, &AArch64::FPR64RegClass); @@ -5348,8 +5382,24 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR16RegClass.contains(DestReg) && AArch64::FPR16RegClass.contains(SrcReg)) { - if (Subtarget.hasZeroCycleRegMoveFPR64() && - !Subtarget.hasZeroCycleRegMoveFPR32()) { + if (Subtarget.hasZeroCycleRegMoveFPR128() && + !Subtarget.hasZeroCycleRegMoveFPR64() && + !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub, + &AArch64::FPR128RegClass); + // This instruction is reading and writing Q registers. This may upset + // the register scavenger and machine verifier, so we need to indicate + // that we are reading an undefined value from SrcRegQ, but a proper + // value from SrcReg. + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ) + .addReg(SrcRegQ, RegState::Undef) + .addReg(SrcRegQ, RegState::Undef) + .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); + } else if (Subtarget.hasZeroCycleRegMoveFPR64() && + !Subtarget.hasZeroCycleRegMoveFPR32()) { const TargetRegisterInfo *TRI = &getRegisterInfo(); MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR64RegClass); @@ -5375,8 +5425,24 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR8RegClass.contains(DestReg) && AArch64::FPR8RegClass.contains(SrcReg)) { - if (Subtarget.hasZeroCycleRegMoveFPR64() && - !Subtarget.hasZeroCycleRegMoveFPR32()) { + if (Subtarget.hasZeroCycleRegMoveFPR128() && + !Subtarget.hasZeroCycleRegMoveFPR64() && + !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub, + &AArch64::FPR128RegClass); + // This instruction is reading and writing Q registers. This may upset + // the register scavenger and machine verifier, so we need to indicate + // that we are reading an undefined value from SrcRegQ, but a proper + // value from SrcReg. + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ) + .addReg(SrcRegQ, RegState::Undef) + .addReg(SrcRegQ, RegState::Undef) + .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); + } else if (Subtarget.hasZeroCycleRegMoveFPR64() && + !Subtarget.hasZeroCycleRegMoveFPR32()) { const TargetRegisterInfo *TRI = &getRegisterInfo(); MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR64RegClass); diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index 42eaeca906e66..b7e08dbe7c792 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -321,6 +321,7 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7", FeatureFuseAES, FeatureFuseCryptoEOR, FeatureStorePairSuppress, FeatureZCRegMoveGPR64, + FeatureZCRegMoveFPR128, FeatureZCZeroing, FeatureZCZeroingFPWorkaround]>; @@ -334,6 +335,7 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10", FeatureFuseCryptoEOR, FeatureStorePairSuppress, FeatureZCRegMoveGPR64, + FeatureZCRegMoveFPR128, FeatureZCZeroing]>; def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11", @@ -346,6 +348,7 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11", FeatureFuseCryptoEOR, FeatureStorePairSuppress, FeatureZCRegMoveGPR64, + FeatureZCRegMoveFPR128, FeatureZCZeroing]>; def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12", @@ -358,6 +361,7 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12", FeatureFuseCryptoEOR, FeatureStorePairSuppress, FeatureZCRegMoveGPR64, + FeatureZCRegMoveFPR128, FeatureZCZeroing]>; def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13", @@ -370,6 +374,7 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13", FeatureFuseCryptoEOR, FeatureStorePairSuppress, FeatureZCRegMoveGPR64, + FeatureZCRegMoveFPR128, FeatureZCZeroing]>; def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", @@ -387,6 +392,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", FeatureFuseLiterals, FeatureStorePairSuppress, FeatureZCRegMoveGPR64, + FeatureZCRegMoveFPR128, FeatureZCZeroing]>; def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15", @@ -404,6 +410,7 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15", FeatureFuseLiterals, FeatureStorePairSuppress, FeatureZCRegMoveGPR64, + FeatureZCRegMoveFPR128, FeatureZCZeroing]>; def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16", @@ -421,6 +428,7 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16", FeatureFuseLiterals, FeatureStorePairSuppress, FeatureZCRegMoveGPR64, + FeatureZCRegMoveFPR128, FeatureZCZeroing]>; def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17", @@ -438,6 +446,7 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17", FeatureFuseLiterals, FeatureStorePairSuppress, FeatureZCRegMoveGPR64, + FeatureZCRegMoveFPR128, FeatureZCZeroing]>; def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4", @@ -454,6 +463,7 @@ def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4", FeatureFuseCryptoEOR, FeatureFuseLiterals, FeatureZCRegMoveGPR64, + FeatureZCRegMoveFPR128, FeatureZCZeroing ]>; diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-fpr.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-fpr.ll index fa15ab42c2638..a0f1b719372b3 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-fpr.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-fpr.ll @@ -1,33 +1,84 @@ -; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOTCPU-LINUX --match-full-lines -; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOTCPU-APPLE --match-full-lines -; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-fpr64 | FileCheck %s -check-prefixes=ATTR --match-full-lines +; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOZCM-FPR128-CPU --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOZCM-FPR128-CPU --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=ZCM-FPR128-CPU --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-fpr128 | FileCheck %s -check-prefixes=NOZCM-FPR128-ATTR --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-fpr128 | FileCheck %s -check-prefixes=ZCM-FPR128-ATTR --match-full-lines + +define void @zero_cycle_regmov_FPR64(double %a, double %b, double %c, double %d) { +entry: +; CHECK-LABEL: t: +; NOZCM-FPR128-CPU: fmov d0, d2 +; NOZCM-FPR128-CPU: fmov d1, d3 +; NOZCM-FPR128-CPU: fmov [[REG2:d[0-9]+]], d3 +; NOZCM-FPR128-CPU: fmov [[REG1:d[0-9]+]], d2 +; NOZCM-FPR128-CPU-NEXT: bl {{_?foo_double}} +; NOZCM-FPR128-CPU: fmov d0, [[REG1]] +; NOZCM-FPR128-CPU: fmov d1, [[REG2]] + +; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3 +; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2 +; ZCM-FPR128-CPU: mov.16b v0, v2 +; ZCM-FPR128-CPU: mov.16b v1, v3 +; ZCM-FPR128-CPU-NEXT: bl {{_?foo_double}} +; ZCM-FPR128-CPU: mov.16b v0, [[REG1]] +; ZCM-FPR128-CPU: mov.16b v1, [[REG2]] + +; NOZCM-FPR128-ATTR: fmov [[REG2:d[0-9]+]], d3 +; NOZCM-FPR128-ATTR: fmov [[REG1:d[0-9]+]], d2 +; NOZCM-FPR128-ATTR: fmov d0, d2 +; NOZCM-FPR128-ATTR: fmov d1, d3 +; NOZCM-FPR128-ATTR-NEXT: bl {{_?foo_double}} +; NOZCM-FPR128-ATTR: fmov d0, [[REG1]] +; NOZCM-FPR128-ATTR: fmov d1, [[REG2]] + +; ZCM-FPR128-ATTR: mov.16b v0, v2 +; ZCM-FPR128-ATTR: mov.16b v1, v3 +; ZCM-FPR128-ATTR: mov.16b [[REG2:v[0-9]+]], v3 +; ZCM-FPR128-ATTR: mov.16b [[REG1:v[0-9]+]], v2 +; ZCM-FPR128-ATTR-NEXT: bl {{_?foo_double}} +; ZCM-FPR128-ATTR: mov.16b v0, [[REG1]] +; ZCM-FPR128-ATTR: mov.16b v1, [[REG2]] + %call = call double @foo_double(double %c, double %d) + %call1 = call double @foo_double(double %c, double %d) + unreachable +} + +declare float @foo_double(double, double) define void @zero_cycle_regmov_FPR32(float %a, float %b, float %c, float %d) { entry: ; CHECK-LABEL: t: -; NOTCPU-LINUX: fmov s0, s2 -; NOTCPU-LINUX: fmov s1, s3 -; NOTCPU-LINUX: fmov [[REG2:s[0-9]+]], s3 -; NOTCPU-LINUX: fmov [[REG1:s[0-9]+]], s2 -; NOTCPU-LINUX-NEXT: bl {{_?foo_float}} -; NOTCPU-LINUX: fmov s0, [[REG1]] -; NOTCPU-LINUX: fmov s1, [[REG2]] +; NOZCM-FPR128-CPU: fmov s0, s2 +; NOZCM-FPR128-CPU: fmov s1, s3 +; NOZCM-FPR128-CPU: fmov [[REG2:s[0-9]+]], s3 +; NOZCM-FPR128-CPU: fmov [[REG1:s[0-9]+]], s2 +; NOZCM-FPR128-CPU-NEXT: bl {{_?foo_float}} +; NOZCM-FPR128-CPU: fmov s0, [[REG1]] +; NOZCM-FPR128-CPU: fmov s1, [[REG2]] -; NOTCPU-APPLE: fmov s0, s2 -; NOTCPU-APPLE: fmov s1, s3 -; NOTCPU-APPLE: fmov [[REG2:s[0-9]+]], s3 -; NOTCPU-APPLE: fmov [[REG1:s[0-9]+]], s2 -; NOTCPU-APPLE-NEXT: bl {{_?foo_float}} -; NOTCPU-APPLE: fmov s0, [[REG1]] -; NOTCPU-APPLE: fmov s1, [[REG2]] +; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3 +; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2 +; ZCM-FPR128-CPU: mov.16b v0, v2 +; ZCM-FPR128-CPU: mov.16b v1, v3 +; ZCM-FPR128-CPU-NEXT: bl {{_?foo_float}} +; ZCM-FPR128-CPU: mov.16b v0, [[REG1]] +; ZCM-FPR128-CPU: mov.16b v1, [[REG2]] -; ATTR: fmov d0, d2 -; ATTR: fmov d1, d3 -; ATTR: fmov [[REG2:d[0-9]+]], d3 -; ATTR: fmov [[REG1:d[0-9]+]], d2 -; ATTR-NEXT: bl {{_?foo_float}} -; ATTR: fmov d0, [[REG1]] -; ATTR: fmov d1, [[REG2]] +; NOZCM-FPR128-ATTR: fmov [[REG2:s[0-9]+]], s3 +; NOZCM-FPR128-ATTR: fmov [[REG1:s[0-9]+]], s2 +; NOZCM-FPR128-ATTR: fmov s0, s2 +; NOZCM-FPR128-ATTR: fmov s1, s3 +; NOZCM-FPR128-ATTR-NEXT: bl {{_?foo_float}} +; NOZCM-FPR128-ATTR: fmov s0, [[REG1]] +; NOZCM-FPR128-ATTR: fmov s1, [[REG2]] + +; ZCM-FPR128-ATTR: mov.16b v0, v2 +; ZCM-FPR128-ATTR: mov.16b v1, v3 +; ZCM-FPR128-ATTR: mov.16b [[REG2:v[0-9]+]], v3 +; ZCM-FPR128-ATTR: mov.16b [[REG1:v[0-9]+]], v2 +; ZCM-FPR128-ATTR-NEXT: bl {{_?foo_float}} +; ZCM-FPR128-ATTR: mov.16b v0, [[REG1]] +; ZCM-FPR128-ATTR: mov.16b v1, [[REG2]] %call = call float @foo_float(float %c, float %d) %call1 = call float @foo_float(float %c, float %d) unreachable @@ -38,29 +89,37 @@ declare float @foo_float(float, float) define void @zero_cycle_regmov_FPR16(half %a, half %b, half %c, half %d) { entry: ; CHECK-LABEL: t: -; NOTCPU-LINUX: fmov s0, s2 -; NOTCPU-LINUX: fmov s1, s3 -; NOTCPU-LINUX: fmov [[REG2:s[0-9]+]], s3 -; NOTCPU-LINUX: fmov [[REG1:s[0-9]+]], s2 -; NOTCPU-LINUX-NEXT: bl {{_?foo_half}} -; NOTCPU-LINUX: fmov s0, [[REG1]] -; NOTCPU-LINUX: fmov s1, [[REG2]] +; NOZCM-FPR128-CPU: fmov s0, s2 +; NOZCM-FPR128-CPU: fmov s1, s3 +; NOZCM-FPR128-CPU: fmov [[REG2:s[0-9]+]], s3 +; NOZCM-FPR128-CPU: fmov [[REG1:s[0-9]+]], s2 +; NOZCM-FPR128-CPU-NEXT: bl {{_?foo_half}} +; NOZCM-FPR128-CPU: fmov s0, [[REG1]] +; NOZCM-FPR128-CPU: fmov s1, [[REG2]] + +; ZCM-FPR128-CPU: mov.16b [[REG2:v[0-9]+]], v3 +; ZCM-FPR128-CPU: mov.16b [[REG1:v[0-9]+]], v2 +; ZCM-FPR128-CPU: mov.16b v0, v2 +; ZCM-FPR128-CPU: mov.16b v1, v3 +; ZCM-FPR128-CPU-NEXT: bl {{_?foo_half}} +; ZCM-FPR128-CPU: mov.16b v0, [[REG1]] +; ZCM-FPR128-CPU: mov.16b v1, [[REG2]] -; NOTCPU-APPLE: fmov s0, s2 -; NOTCPU-APPLE: fmov s1, s3 -; NOTCPU-APPLE: fmov [[REG2:s[0-9]+]], s3 -; NOTCPU-APPLE: fmov [[REG1:s[0-9]+]], s2 -; NOTCPU-APPLE-NEXT: bl {{_?foo_half}} -; NOTCPU-APPLE: fmov s0, [[REG1]] -; NOTCPU-APPLE: fmov s1, [[REG2]] +; NOZCM-FPR128-ATTR: fmov [[REG2:s[0-9]+]], s3 +; NOZCM-FPR128-ATTR: fmov [[REG1:s[0-9]+]], s2 +; NOZCM-FPR128-ATTR: fmov s0, s2 +; NOZCM-FPR128-ATTR: fmov s1, s3 +; NOZCM-FPR128-ATTR-NEXT: bl {{_?foo_half}} +; NOZCM-FPR128-ATTR: fmov s0, [[REG1]] +; NOZCM-FPR128-ATTR: fmov s1, [[REG2]] -; ATTR: fmov d0, d2 -; ATTR: fmov d1, d3 -; ATTR: fmov [[REG2:d[0-9]+]], d3 -; ATTR: fmov [[REG1:d[0-9]+]], d2 -; ATTR-NEXT: bl {{_?foo_half}} -; ATTR: fmov d0, [[REG1]] -; ATTR: fmov d1, [[REG2]] +; ZCM-FPR128-ATTR: mov.16b v0, v2 +; ZCM-FPR128-ATTR: mov.16b v1, v3 +; ZCM-FPR128-ATTR: mov.16b [[REG2:v[0-9]+]], v3 +; ZCM-FPR128-ATTR: mov.16b [[REG1:v[0-9]+]], v2 +; ZCM-FPR128-ATTR-NEXT: bl {{_?foo_half}} +; ZCM-FPR128-ATTR: mov.16b v0, [[REG1]] +; ZCM-FPR128-ATTR: mov.16b v1, [[REG2]] %call = call half @foo_half(half %c, half %d) %call1 = call half @foo_half(half %c, half %d) unreachable