Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/libtorchaudio/cuctc/src/ctc_prefix_decoder_kernel_v2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <float.h>
#include <algorithm>
#include <limits>
#include "../include/ctc_prefix_decoder_host.h"
#include "ctc_fast_divmod.cuh"
#include "cub/cub.cuh"
Expand Down Expand Up @@ -440,7 +441,11 @@ __launch_bounds__(BLOCK_SIZE) void topk_reduce_and_copy_list_per_batch_kernel(
topk_values,
beam,
items_per_batch,
#if CUDART_VERSION >= 12090 // CUDA 12.9 and later
std::numeric_limits<float>::lowest(),
#else
cub::FpLimits<float>::Lowest(),
#endif
block_topk_fun,
set_key_value);

Expand Down
4 changes: 4 additions & 0 deletions src/libtorchaudio/forced_align/gpu/compute.cu
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,11 @@ __global__ void falign_cuda_step_kernel(
alphas_a[curIdxOffset][i] = result + logProbs_a[batchIndex][t][labelIdx];
threadMax = max(threadMax, alphas_a[curIdxOffset][i]);
}
#if CUDART_VERSION >= 12090 // CUDA 12.9 and later
scalar_t maxResult = BlockReduce(tempStorage).Reduce(threadMax, thrust::maximum<scalar_t>());
#else
scalar_t maxResult = BlockReduce(tempStorage).Reduce(threadMax, cub::Max());
#endif
if (threadIdx.x == 0) {
maxValue = maxResult;
}
Expand Down
Loading