Switch comments from GPU dialect terms to CUDA terms (NFC).

local workgroup -> block, subgroup -> warp, invocation -> thread. PiperOrigin-RevId: 271946342
llvm · Sep 30, 2019 · 3d9679b · 3d9679b
1 parent e5a4318
commit 3d9679b
Showing 1 changed file with 7 additions and 8 deletions.
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -119,12 +119,11 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
   }
 
 private:
-  // Creates an all_reduce across the local workgroup.
+  // Creates an all_reduce across the block.
   //
-  // First reduce the elements within a subgroup (i.e. warp). The first
-  // invocation of each subgroup writes the intermediate result to shared
-  // memory. After synchronizing the local workgroup, each subgroup reduces all
-  // values from shared memory.
+  // First reduce the elements within a warp. The first thread of each warp
+  // writes the intermediate result to shared memory. After synchronizing the
+  // block, each warp reduces all values from shared memory.
   //
   //     %warp_reduce = ... (see createWarpReduce)
   //     %buffer = llvm.mlir.addressof @reduce_buffer : !llvm<"[32 x float]*">
@@ -188,7 +187,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
     return result;
   }
 
-  // Creates an all_reduce across the subgroup. Creates a preamble
+  // Creates an all_reduce across the warp. Creates a preamble
   //
   //     %active_mask = llvm.mlir.constant(-1 : i32) : !llvm.i32
   //     %mask_and_clamp = llvm.mlir.constant(31 : i32) : !llvm.i32
@@ -200,7 +199,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
   //        %active_mask, %operand, %offset, %mask_and_clamp : !llvm.float
   //     %operand = llvm.fadd %operand, %value : !llvm.float
   //
-  // Each invocation returns the same result.
+  // Each thread returns the same result.
   //
   // Note: this currently only supports reducing exactly 32 values.
   Value *createWarpReduce(Location loc, Value *operand,
@@ -245,7 +244,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
     return rewriter.create<LLVM::AddressOfOp>(loc, globalOp);
   }
 
-  // Returns the index of the subgroup within the local workgroup.
+  // Returns the index of the warp within the block.
   //
   //     %warp_size = llvm.mlir.constant(32 : i32) : !llvm.i32
   //     %thread_idx = nvvm.read.ptx.sreg.tid.x  : !llvm.i32