diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 14c33b5190f5e..5e4ec632ee5a4 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -162,13 +162,25 @@ static SPIRVTypeInst getArgSPIRVType(const Function &F, unsigned ArgIdx, Type *OriginalArgType = SPIRV::getOriginalFunctionType(F)->getParamType(ArgIdx); + // Vector of untyped pointers: build with the deduced pointee instead of + // the default i8 (mismatches typed uses downstream). + Argument *Arg = F.getArg(ArgIdx); + if (auto *VTy = dyn_cast(OriginalArgType); + VTy && isUntypedPointerTy(VTy->getElementType())) + if (Type *ElemTy = GR->findDeducedElementType(Arg)) + return GR->getOrCreateSPIRVVectorType( + GR->getOrCreateSPIRVPointerType( + ElemTy, MIRBuilder, + addressSpaceToStorageClass( + getPointerAddressSpace(OriginalArgType), ST)), + VTy->getNumElements(), MIRBuilder, true); + // If OriginalArgType is non-pointer, use the OriginalArgType (the type cannot // be legally reassigned later). if (!isPointerTy(OriginalArgType)) return GR->getOrCreateSPIRVType(OriginalArgType, MIRBuilder, ArgAccessQual, true); - Argument *Arg = F.getArg(ArgIdx); Type *ArgType = Arg->getType(); if (isTypedPointerTy(ArgType)) { return GR->getOrCreateSPIRVPointerType( diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index b6e71c7b76348..9e2f700260c75 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -1864,6 +1864,63 @@ Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) { IRBuilder<> B(I.getParent()); B.SetInsertPoint(&I); + // OpPtrAccessChain requires a scalar pointer result; scalarize per-lane + // GEPs that return and rebuild the vector via insertelement. + if (auto *RetVTy = dyn_cast(I.getType())) { + unsigned N = RetVTy->getNumElements(); + Value *PtrOp = I.getPointerOperand(); + bool PtrIsVec = isa(PtrOp->getType()); + Type *ResultPtrTy = RetVTy->getElementType(); + Type *ScalarPtrTy = PtrOp->getType()->getScalarType(); + SmallVector GepTypes = {ResultPtrTy, ScalarPtrTy}; + Value *InBounds = B.getInt1(I.isInBounds()); + Type *LanePointeeTy = getGEPType(&I); + Type *SrcElemTy = I.getSourceElementType(); + + // Pin the lane pointee type on the vector operand and on each extracted + // lane so the prelegalizer wraps them as OpTypeVector/OpTypePointer of + // the right element type instead of defaulting to i8. + if (PtrIsVec) + GR->buildAssignPtr(B, SrcElemTy, PtrOp); + + Value *VecResult = PoisonValue::get(RetVTy); + for (unsigned Lane = 0; Lane < N; ++Lane) { + Value *LaneIdx = B.getInt32(Lane); + Value *ScalarPtr = PtrOp; + if (PtrIsVec) { + SmallVector ExtractTypes = {ScalarPtrTy, PtrOp->getType(), + LaneIdx->getType()}; + ScalarPtr = B.CreateIntrinsic(Intrinsic::spv_extractelt, {ExtractTypes}, + {PtrOp, LaneIdx}); + GR->buildAssignPtr(B, SrcElemTy, ScalarPtr); + } + SmallVector Args; + Args.push_back(InBounds); + Args.push_back(ScalarPtr); + for (Value *Idx : I.indices()) { + if (isa(Idx->getType())) + Args.push_back(B.CreateExtractElement(Idx, LaneIdx)); + else + Args.push_back(Idx); + } + Value *ScalarGep = B.CreateIntrinsic(Intrinsic::spv_gep, GepTypes, Args); + GR->buildAssignPtr(B, LanePointeeTy, ScalarGep); + VecResult = B.CreateInsertElement(VecResult, ScalarGep, LaneIdx); + } + + auto *NewI = cast(VecResult); + replaceAllUsesWithAndErase(B, &I, NewI); + + if (CallInst *Old = GR->findAssignPtrTypeInstr(NewI)) { + Old->eraseFromParent(); + GR->addAssignPtrTypeInstr(NewI, nullptr); + } + setInsertPointAfterDef(B, NewI); + GR->buildAssignPtr(B, LanePointeeTy, NewI); + + return NewI; + } + if (TM.getSubtargetImpl()->isLogicalSPIRV() && !isFirstIndexZero(&I)) { // Logical SPIR-V cannot use the OpPtrAccessChain instruction. If the first // index of the GEP is not 0, then we need to try to adjust it. @@ -3038,6 +3095,19 @@ void SPIRVEmitIntrinsics::processParamTypesByFunHeader(Function *F, B.SetInsertPointPastAllocas(F); for (unsigned OpIdx = 0; OpIdx < F->arg_size(); ++OpIdx) { Argument *Arg = F->getArg(OpIdx); + // Vector-of-pointers arg: deduce pointee from a GEP user so the function + // type isn't emitted with the default i8 pointee. + if (isUntypedPointerVectorTy(Arg->getType()) && + !GR->findDeducedElementType(Arg)) { + for (User *U : Arg->users()) { + auto *GEP = dyn_cast(U); + if (GEP && GEP->getPointerOperand() == Arg) { + GR->buildAssignPtr(B, GEP->getSourceElementType(), Arg); + break; + } + } + continue; + } if (!isUntypedPointerTy(Arg->getType())) continue; Type *ElemTy = GR->findDeducedElementType(Arg); diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index 75cc7133f1766..3fa9f27d5d601 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -579,6 +579,13 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, SPIRVTypeInst AssignedPtrType = GR->getOrCreateSPIRVPointerType( ElementTy, MI, addressSpaceToStorageClass(MI.getOperand(3).getImm(), *ST)); + // The intrinsic also carries vector-of-pointer values produced by + // scalarized vector GEPs; wrap the pointer in OpTypeVector to match + // the vreg's LLT. + LLT RegTy = MRI.getType(Reg); + if (RegTy.isValid() && RegTy.isVector()) + AssignedPtrType = GR->getOrCreateSPIRVVectorType( + AssignedPtrType, RegTy.getNumElements(), MIB, true); MachineInstr *Def = MRI.getVRegDef(Reg); assert(Def && "Expecting an instruction that defines the register"); // G_GLOBAL_VALUE already has type info. diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index a71320f8f443b..4d64f3fa9db2c 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -371,6 +371,12 @@ inline bool isPointerTy(const Type *T) { return isUntypedPointerTy(T) || isTypedPointerTy(T); } +// True if this is a vector whose element type is an (untyped) PointerType. +inline bool isUntypedPointerVectorTy(const Type *T) { + return isa_and_nonnull(T) && + isUntypedPointerTy(T->getScalarType()); +} + // Get the address space of this pointer or pointer vector type for instances of // PointerType or TypedPointerType. inline unsigned getPointerAddressSpace(const Type *T) { diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_masked_gather_scatter/single-element-vector-gep-no-extension.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_masked_gather_scatter/single-element-vector-gep-no-extension.ll new file mode 100644 index 0000000000000..b5c4578cc252a --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_masked_gather_scatter/single-element-vector-gep-no-extension.ll @@ -0,0 +1,24 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; A <1 x ptr> GEP scalarizes to a single spv_gep plus an +; insertelement/extractelement pair that folds away, so no vector-of-pointers +; SPIR-V type is materialized and SPV_INTEL_masked_gather_scatter is not +; required. + +; CHECK-DAG: %[[#I32:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#PTR_I32:]] = OpTypePointer CrossWorkgroup %[[#I32]] +; CHECK-NOT: OpTypeVector %[[#PTR_I32]] + +; CHECK: OpFunction +; CHECK: OpPtrAccessChain +; CHECK-NOT: OpCompositeInsert +; CHECK: OpLoad %[[#I32]] +; CHECK: OpFunctionEnd +define spir_kernel void @test_vector_gep_v1(ptr addrspace(1) %p, ptr addrspace(1) %out) { + %gep = getelementptr i32, ptr addrspace(1) %p, <1 x i64> + %elem = extractelement <1 x ptr addrspace(1)> %gep, i32 0 + %val = load i32, ptr addrspace(1) %elem + store i32 %val, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_masked_gather_scatter/vector-of-pointers-gep-no-extension.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_masked_gather_scatter/vector-of-pointers-gep-no-extension.ll new file mode 100644 index 0000000000000..a521ae627e3f2 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_masked_gather_scatter/vector-of-pointers-gep-no-extension.ll @@ -0,0 +1,20 @@ +; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK-NOT: {{[Ii]}}ntrinsic has incorrect return type +; CHECK: error:{{.*}}Vector of pointers requires SPV_INTEL_masked_gather_scatter extension + +define spir_kernel void @test_vector_gep_v2(ptr addrspace(1) %p, ptr addrspace(1) %out) { + %gep = getelementptr i32, ptr addrspace(1) %p, <2 x i64> zeroinitializer + %elem = extractelement <2 x ptr addrspace(1)> %gep, i32 0 + %val = load i32, ptr addrspace(1) %elem + store i32 %val, ptr addrspace(1) %out + ret void +} + +define spir_kernel void @test_vector_gep_v4(ptr addrspace(1) %p, ptr addrspace(1) %out) { + %gep = getelementptr i32, ptr addrspace(1) %p, <4 x i64> zeroinitializer + %elem = extractelement <4 x ptr addrspace(1)> %gep, i32 0 + %val = load i32, ptr addrspace(1) %elem + store i32 %val, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_masked_gather_scatter/vector-of-pointers-gep.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_masked_gather_scatter/vector-of-pointers-gep.ll new file mode 100644 index 0000000000000..2407cf3ee61cc --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_masked_gather_scatter/vector-of-pointers-gep.ll @@ -0,0 +1,111 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_masked_gather_scatter %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_masked_gather_scatter %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#I32:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#PTR:]] = OpTypePointer CrossWorkgroup %[[#I32]] +; CHECK-DAG: %[[#I64:]] = OpTypeInt 64 0 +; CHECK-DAG: %[[#ONE:]] = OpConstant %[[#I64]] 1 +; CHECK-DAG: %[[#TWO:]] = OpConstant %[[#I64]] 2 +; CHECK-DAG: %[[#FIVE:]] = OpConstant %[[#I64]] 5 +; CHECK-DAG: %[[#VPTR2:]] = OpTypeVector %[[#PTR]] 2 +; CHECK-DAG: %[[#VI64_2:]] = OpTypeVector %[[#I64]] 2 +; CHECK-DAG: %[[#UNDEF2:]] = OpUndef %[[#VPTR2]] +; CHECK-DAG: %[[#NULL2:]] = OpConstantNull %[[#VI64_2]] +; CHECK-DAG: %[[#VPTR4:]] = OpTypeVector %[[#PTR]] 4 +; CHECK-DAG: %[[#VI64_4:]] = OpTypeVector %[[#I64]] 4 +; CHECK-DAG: %[[#UNDEF4:]] = OpUndef %[[#VPTR4]] +; CHECK-DAG: %[[#NULL4:]] = OpConstantNull %[[#VI64_4]] + +; The <1 x ptr> GEP collapses to a single scalar OpPtrAccessChain; no +; vector-of-pointers value is materialized. +; CHECK: OpFunction +; CHECK-NEXT: %[[#P1:]] = OpFunctionParameter %[[#PTR]] +; CHECK-NEXT: %[[#OUT1:]] = OpFunctionParameter %[[#PTR]] +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[#GEP1:]] = OpPtrAccessChain %[[#PTR]] %[[#P1]] %[[#FIVE]] +; CHECK-NEXT: %[[#VAL1:]] = OpLoad %[[#I32]] %[[#GEP1]] +; CHECK-NEXT: OpStore %[[#OUT1]] %[[#VAL1]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +define spir_kernel void @test_vector_gep_v1(ptr addrspace(1) %p, ptr addrspace(1) %out) { + %gep = getelementptr i32, ptr addrspace(1) %p, <1 x i64> + %elem = extractelement <1 x ptr addrspace(1)> %gep, i32 0 + %val = load i32, ptr addrspace(1) %elem + store i32 %val, ptr addrspace(1) %out + ret void +} + +; CHECK: OpFunction +; CHECK-NEXT: %[[#P2:]] = OpFunctionParameter %[[#PTR]] +; CHECK-NEXT: %[[#OUT2:]] = OpFunctionParameter %[[#PTR]] +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[#IDX2_0:]] = OpCompositeExtract %[[#I64]] %[[#NULL2]] 0 +; CHECK-NEXT: %[[#GEP2_0:]] = OpPtrAccessChain %[[#PTR]] %[[#P2]] %[[#IDX2_0]] +; CHECK-NEXT: %[[#INS2_0:]] = OpCompositeInsert %[[#VPTR2]] %[[#GEP2_0]] %[[#UNDEF2]] 0 +; CHECK-NEXT: %[[#IDX2_1:]] = OpCompositeExtract %[[#I64]] %[[#NULL2]] 1 +; CHECK-NEXT: %[[#GEP2_1:]] = OpPtrAccessChain %[[#PTR]] %[[#P2]] %[[#IDX2_1]] +; CHECK-NEXT: %[[#INS2_1:]] = OpCompositeInsert %[[#VPTR2]] %[[#GEP2_1]] %[[#INS2_0]] 1 +; CHECK-NEXT: %[[#ELT2:]] = OpCompositeExtract %[[#PTR]] %[[#INS2_1]] 0 +; CHECK-NEXT: %[[#VAL2:]] = OpLoad %[[#I32]] %[[#ELT2]] +; CHECK-NEXT: OpStore %[[#OUT2]] %[[#VAL2]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +define spir_kernel void @test_vector_gep_v2(ptr addrspace(1) %p, ptr addrspace(1) %out) { + %gep = getelementptr i32, ptr addrspace(1) %p, <2 x i64> zeroinitializer + %elem = extractelement <2 x ptr addrspace(1)> %gep, i32 0 + %val = load i32, ptr addrspace(1) %elem + store i32 %val, ptr addrspace(1) %out + ret void +} + +; CHECK: OpFunction +; CHECK-NEXT: %[[#P4:]] = OpFunctionParameter %[[#PTR]] +; CHECK-NEXT: %[[#OUT4:]] = OpFunctionParameter %[[#PTR]] +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[#IDX4_0:]] = OpCompositeExtract %[[#I64]] %[[#NULL4]] 0 +; CHECK-NEXT: %[[#GEP4_0:]] = OpPtrAccessChain %[[#PTR]] %[[#P4]] %[[#IDX4_0]] +; CHECK-NEXT: %[[#INS4_0:]] = OpCompositeInsert %[[#VPTR4]] %[[#GEP4_0]] %[[#UNDEF4]] 0 +; CHECK-NEXT: %[[#IDX4_1:]] = OpCompositeExtract %[[#I64]] %[[#NULL4]] 1 +; CHECK-NEXT: %[[#GEP4_1:]] = OpPtrAccessChain %[[#PTR]] %[[#P4]] %[[#IDX4_1]] +; CHECK-NEXT: %[[#INS4_1:]] = OpCompositeInsert %[[#VPTR4]] %[[#GEP4_1]] %[[#INS4_0]] 1 +; CHECK-NEXT: %[[#IDX4_2:]] = OpCompositeExtract %[[#I64]] %[[#NULL4]] 2 +; CHECK-NEXT: %[[#GEP4_2:]] = OpPtrAccessChain %[[#PTR]] %[[#P4]] %[[#IDX4_2]] +; CHECK-NEXT: %[[#INS4_2:]] = OpCompositeInsert %[[#VPTR4]] %[[#GEP4_2]] %[[#INS4_1]] 2 +; CHECK-NEXT: %[[#IDX4_3:]] = OpCompositeExtract %[[#I64]] %[[#NULL4]] 3 +; CHECK-NEXT: %[[#GEP4_3:]] = OpPtrAccessChain %[[#PTR]] %[[#P4]] %[[#IDX4_3]] +; CHECK-NEXT: %[[#INS4_3:]] = OpCompositeInsert %[[#VPTR4]] %[[#GEP4_3]] %[[#INS4_2]] 3 +; CHECK-NEXT: %[[#ELT4:]] = OpCompositeExtract %[[#PTR]] %[[#INS4_3]] 0 +; CHECK-NEXT: %[[#VAL4:]] = OpLoad %[[#I32]] %[[#ELT4]] +; CHECK-NEXT: OpStore %[[#OUT4]] %[[#VAL4]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +define spir_kernel void @test_vector_gep_v4(ptr addrspace(1) %p, ptr addrspace(1) %out) { + %gep = getelementptr i32, ptr addrspace(1) %p, <4 x i64> zeroinitializer + %elem = extractelement <4 x ptr addrspace(1)> %gep, i32 0 + %val = load i32, ptr addrspace(1) %elem + store i32 %val, ptr addrspace(1) %out + ret void +} + +; CHECK: OpFunction +; CHECK-NEXT: %[[#PV:]] = OpFunctionParameter %[[#VPTR2]] +; CHECK-NEXT: %[[#OUTV:]] = OpFunctionParameter %[[#PTR]] +; CHECK-NEXT: OpLabel +; CHECK-NEXT: %[[#EXPV_0:]] = OpCompositeExtract %[[#PTR]] %[[#PV]] 0 +; CHECK-NEXT: %[[#GEPV_0:]] = OpPtrAccessChain %[[#PTR]] %[[#EXPV_0]] %[[#ONE]] +; CHECK-NEXT: %[[#INSV_0:]] = OpCompositeInsert %[[#VPTR2]] %[[#GEPV_0]] %[[#UNDEF2]] 0 +; CHECK-NEXT: %[[#EXPV_1:]] = OpCompositeExtract %[[#PTR]] %[[#PV]] 1 +; CHECK-NEXT: %[[#GEPV_1:]] = OpPtrAccessChain %[[#PTR]] %[[#EXPV_1]] %[[#TWO]] +; CHECK-NEXT: %[[#INSV_1:]] = OpCompositeInsert %[[#VPTR2]] %[[#GEPV_1]] %[[#INSV_0]] 1 +; CHECK-NEXT: %[[#ELTV:]] = OpCompositeExtract %[[#PTR]] %[[#INSV_1]] 0 +; CHECK-NEXT: %[[#VALV:]] = OpLoad %[[#I32]] %[[#ELTV]] +; CHECK-NEXT: OpStore %[[#OUTV]] %[[#VALV]] +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +define spir_kernel void @test_vector_gep_vec_ptr(<2 x ptr addrspace(1)> %ptrs, ptr addrspace(1) %out) { + %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptrs, <2 x i64> + %elem = extractelement <2 x ptr addrspace(1)> %gep, i32 0 + %val = load i32, ptr addrspace(1) %elem + store i32 %val, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/pointers/getelementptr-vector-index.ll b/llvm/test/CodeGen/SPIRV/pointers/getelementptr-vector-index.ll index 0f710f9827679..bf3fa1b8ffe35 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/getelementptr-vector-index.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/getelementptr-vector-index.ll @@ -3,15 +3,13 @@ ; CHECK-DAG: %[[#INT32:]] = OpTypeInt 32 0 ; CHECK-DAG: %[[#PTR_INT32:]] = OpTypePointer CrossWorkgroup %[[#INT32]] -; CHECK-DAG: %[[#INT8:]] = OpTypeInt 8 0 -; CHECK-DAG: %[[#PTR_INT8:]] = OpTypePointer CrossWorkgroup %[[#INT8]] ; CHECK-DAG: %[[#INT64:]] = OpTypeInt 64 0 ; CHECK-DAG: %[[#CONST_0:]] = OpConstantNull %[[#INT64]] ; CHECK-LABEL: Begin function test_vector_gep_with_load -; CHECK: %[[#BC1:]] = OpBitcast %[[#PTR_INT8]] %[[#]] -; CHECK: %[[#GEP:]] = OpPtrAccessChain %[[#PTR_INT8]] %[[#BC1]] %[[#CONST_0]] -; CHECK: %[[#BC2:]] = OpBitcast %[[#PTR_INT32]] %[[#GEP]] -; CHECK: %[[#VAL:]] = OpLoad %[[#INT32]] %[[#BC2]] +; CHECK-NOT: OpBitcast +; CHECK: %[[#GEP:]] = OpPtrAccessChain %[[#PTR_INT32]] %[[#]] %[[#CONST_0]] +; CHECK-NOT: OpBitcast +; CHECK: %[[#VAL:]] = OpLoad %[[#INT32]] %[[#GEP]] ; CHECK: OpStore %[[#]] %[[#VAL]] ; CHECK: OpFunctionEnd define spir_kernel void @test_vector_gep_with_load(ptr addrspace(1) %p, ptr addrspace(1) %out) {