target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn--amdpal" %llpc.matrix.column = type <{ [4 x float] }> @__llpc_global_proxy_ = external addrspace(4) constant [5 x <3 x float>] @llvmir = external constant [316420 x i8] define amdgpu_cs void @_amdgpu_cs_main(<3 x i32> %WorkgroupId) { .entry: %0 = inttoptr i128 0 to ptr addrspace(8) %1 = inttoptr i64 0 to ptr addrspace(4) %2 = getelementptr <{ [4 x i32] }>, ptr addrspace(7) null, i32 0, i32 0, i32 2 %3 = getelementptr <{ [4 x %llpc.matrix.column], [4 x %llpc.matrix.column], [4 x %llpc.matrix.column], [4 x float], [4 x float], [4 x float], [4 x float], [4 x float], [4 x i32], [4 x float], [4 x %llpc.matrix.column], <{ [4 x i32], [4 x i32] }>, [5 x [4 x %llpc.matrix.column]], [5 x [4 x float]], [16 x [4 x %llpc.matrix.column]], [4 x float], [4 x float], [4 x float], [4 x float], [4 x float], [4 x float], [4 x float], [4 x float], [4 x float], [4 x float], [4 x float], [4 x float], [4 x float] }>, ptr addrspace(7) null, i32 0, i32 25, i32 3 %4 = load <4 x i8>, ptr addrspace(7) null, align 4 %5 = trunc i32 0 to i16 %bc4467 = bitcast <4 x i32> zeroinitializer to <8 x half> %6 = extractelement <8 x half> zeroinitializer, i64 0 ret void } ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.fabs.f32(float) #0 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.umax.i32(i32, i32) #0 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) declare ptr @llvm.invariant.start.p7(i64 immarg, ptr addrspace(7) nocapture) #1 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.amdgcn.s.getpc() #0 ; Function Attrs: convergent nounwind declare <8 x i32> @llvm.amdgcn.waterfall.readfirstlane.v8i32.v8i32(i32, <8 x i32>) #2 ; Function Attrs: convergent nounwind declare <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32, <4 x float>) #2 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.amdgcn.cubesc(float, float, float) #0 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.pow.f32(float, float) #0 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare i32 @llvm.amdgcn.image.sample.lz.2d.i32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <3 x float> @llvm.amdgcn.image.sample.l.cube.v3f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare float @llvm.amdgcn.image.sample.c.lz.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.powi.f32.i32(float, i32) #0 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) declare void @llvm.amdgcn.image.store.2d.v3f32.i32(<3 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #4 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <2 x i32> @llvm.amdgcn.image.load.2d.v2i32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } attributes #2 = { convergent nounwind } attributes #3 = { nocallback nofree nosync nounwind willreturn memory(read) } attributes #4 = { nocallback nofree nosync nounwind willreturn memory(write) }