diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 2e9602d1b3793..2791ae8a4bb95 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -1532,11 +1532,12 @@ class HLSLBufferCopyEmitter { bool emitCopy(QualType CType) { LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType); - // TODO: We should be able to fall back to a regular memcpy if the layout - // type doesn't have any padding, but that runs into issues in the backend - // currently. - // - // See https://github.com/llvm/wg-hlsl/issues/351 + // If the layout type matches the original type, we can just fall back to a + // regular memcpy. + llvm::Type *OrigTy = CGF.CGM.getTypes().ConvertTypeForMem(CType); + if (LayoutTy == OrigTy) + return false; + emitCopyAtIndices(LayoutTy, llvm::ConstantInt::get(CGF.SizeTy, 0), llvm::ConstantInt::get(CGF.SizeTy, 0)); return true; diff --git a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl index d1bfc6db8b504..adb2ca00faac0 100644 --- a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl +++ b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl @@ -153,17 +153,11 @@ void arr_assign8() { C = c1; } -// TODO: We should be able to just memcpy here. -// See https://github.com/llvm/wg-hlsl/issues/351 +// Since everything is aligned on 16 byte boundaries, we just get memcpy. // // CHECK-LABEL: define hidden void {{.*}}arr_assign9 // CHECK: [[C:%.*]] = alloca [2 x <4 x i32>], align 16 -// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0 -// CHECK-NEXT: [[L0:%.*]] = load <4 x i32>, ptr addrspace(2) @c2, align 16 -// CHECK-NEXT: store <4 x i32> [[L0]], ptr [[V0]], align 16 -// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0, i32 1 -// CHECK-NEXT: [[L1:%.*]] = load <4 x i32>, ptr addrspace(2) getelementptr inbounds ([2 x <4 x i32>], ptr addrspace(2) @c2, i32 0, i32 1), align 16 -// CHECK-NEXT: store <4 x i32> [[L1]], ptr [[V1]], align 16 +// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 16 [[C]], ptr addrspace(2) align 16 @c2, i32 32, i1 false) // CHECK-NEXT: ret void void arr_assign9() { int4 C[2];