From 557294eb774ee0bda1817fc8249cae62e916eafa Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Tue, 16 Mar 2021 15:59:21 -0700 Subject: [PATCH] cuda: fix windowing test with cuda Fixes #422. This commit ensures that the allocation clearing logic is applied to the CUDA backend as well. The windowing test caught this because TACO was automatically parallelizing the loop onto the GPU. --- src/codegen/codegen_cuda.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/codegen/codegen_cuda.cpp b/src/codegen/codegen_cuda.cpp index 971410b98..77cf0cd88 100644 --- a/src/codegen/codegen_cuda.cpp +++ b/src/codegen/codegen_cuda.cpp @@ -1096,6 +1096,20 @@ void CodeGen_CUDA::visit(const Allocate* op) { op->num_elements.accept(this); parentPrecedence = TOP; stream << "));" << endl; + // If the operation wants the input cleared, then memset it to zero. + if (op->clear) { + doIndent(); + stream << "gpuErrchk(cudaMemset("; + op->var.accept(this); + stream << variable_name; + stream << ", 0, "; + stream << "sizeof(" << elementType << ")"; + stream << " * "; + parentPrecedence = MUL; + op->num_elements.accept(this); + parentPrecedence = TOP; + stream << "));" << endl; + } if(op->is_realloc) { doIndent();