Skip to content

Commit

Permalink
Replace cuio macros with constexpr and inline functions (#6782)
Browse files Browse the repository at this point in the history
Fixes #521 - Replaces macros with constexpr variables and inline functions wherever applicable.
  • Loading branch information
kaatish committed Nov 29, 2020
1 parent f0f53c7 commit 1771a8f
Show file tree
Hide file tree
Showing 27 changed files with 729 additions and 717 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@
- PR #6653 Replaced SHFL_XOR calls with cub::WarpReduce
- PR #6751 Rework ColumnViewAccess and its usage
- PR #6698 Remove macros from ORC reader and writer
- PR #6782 Replace cuio macros with constexpr and inline functions

## Bug Fixes

Expand Down
3 changes: 2 additions & 1 deletion cpp/src/io/avro/avro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@ std::string container::get_encoded()
*/
bool container::parse(file_metadata *md, size_t max_num_rows, size_t first_row)
{
constexpr uint32_t avro_magic = (('O' << 0) | ('b' << 8) | ('j' << 16) | (0x01 << 24));
uint32_t sig4, max_block_size;
size_t total_object_count;

sig4 = get_raw<uint8_t>();
sig4 |= get_raw<uint8_t>() << 8;
sig4 |= get_raw<uint8_t>() << 16;
sig4 |= get_raw<uint8_t>() << 24;
if (sig4 != AVRO_MAGIC) { return false; }
if (sig4 != avro_magic) { return false; }
for (;;) {
uint32_t num_md_items = static_cast<uint32_t>(get_encoded<int64_t>());
if (num_md_items == 0) { break; }
Expand Down
1 change: 0 additions & 1 deletion cpp/src/io/avro/avro.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
namespace cudf {
namespace io {
namespace avro {
#define AVRO_MAGIC (('O' << 0) | ('b' << 8) | ('j' << 16) | (0x01 << 24))

/**
* @Brief AVRO schema entry
Expand Down
30 changes: 15 additions & 15 deletions cpp/src/io/avro/avro_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ namespace cudf {
namespace io {
namespace avro {
namespace gpu {
#define NWARPS 16
#define MAX_SHARED_SCHEMA_LEN 1000
constexpr int num_warps = 16;
constexpr int max_shared_schema_len = 1000;

/*
* Avro varint encoding - see
Expand Down Expand Up @@ -228,8 +228,8 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema,
* @param[in] first_row Crop all rows below first_row
*
**/
// blockDim {32,NWARPS,1}
extern "C" __global__ void __launch_bounds__(NWARPS * 32, 2)
// blockDim {32,num_warps,1}
extern "C" __global__ void __launch_bounds__(num_warps * 32, 2)
gpuDecodeAvroColumnData(block_desc_s *blocks,
schemadesc_s *schema_g,
device_span<nvstrdesc_s> global_dictionary,
Expand All @@ -240,19 +240,19 @@ extern "C" __global__ void __launch_bounds__(NWARPS * 32, 2)
size_t max_rows,
size_t first_row)
{
__shared__ __align__(8) schemadesc_s g_shared_schema[MAX_SHARED_SCHEMA_LEN];
__shared__ __align__(8) block_desc_s blk_g[NWARPS];
__shared__ __align__(8) schemadesc_s g_shared_schema[max_shared_schema_len];
__shared__ __align__(8) block_desc_s blk_g[num_warps];

schemadesc_s *schema;
block_desc_s *const blk = &blk_g[threadIdx.y];
uint32_t block_id = blockIdx.x * NWARPS + threadIdx.y;
uint32_t block_id = blockIdx.x * num_warps + threadIdx.y;
size_t cur_row;
uint32_t rows_remaining;
const uint8_t *cur, *end;

// Fetch schema into shared mem if possible
if (schema_len <= MAX_SHARED_SCHEMA_LEN) {
for (int i = threadIdx.y * 32 + threadIdx.x; i < schema_len; i += NWARPS * 32) {
if (schema_len <= max_shared_schema_len) {
for (int i = threadIdx.y * 32 + threadIdx.x; i < schema_len; i += num_warps * 32) {
g_shared_schema[i] = schema_g[i];
}
__syncthreads();
Expand Down Expand Up @@ -289,11 +289,11 @@ extern "C" __global__ void __launch_bounds__(NWARPS * 32, 2)
global_dictionary);
}
if (nrows <= 1) {
cur = start + SHFL0(static_cast<uint32_t>(cur - start));
cur = start + shuffle(static_cast<uint32_t>(cur - start));
} else {
cur = start + nrows * min_row_size;
}
SYNCWARP();
__syncwarp();
cur_row += nrows;
rows_remaining -= nrows;
}
Expand Down Expand Up @@ -324,10 +324,10 @@ void DecodeAvroColumnData(block_desc_s *blocks,
uint32_t min_row_size,
rmm::cuda_stream_view stream)
{
// NWARPS warps per threadblock
dim3 const dim_block(32, NWARPS);
// 1 warp per datablock, NWARPS datablocks per threadblock
dim3 const dim_grid((num_blocks + NWARPS - 1) / NWARPS, 1);
// num_warps warps per threadblock
dim3 const dim_block(32, num_warps);
// 1 warp per datablock, num_warps datablocks per threadblock
dim3 const dim_grid((num_blocks + num_warps - 1) / num_warps, 1);

gpuDecodeAvroColumnData<<<dim_grid, dim_block, 0, stream.value()>>>(blocks,
schema,
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/comp/brotli_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ struct brotli_dictionary_s {
uint8_t data[122784];
};

#define BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
#define BROTLI_MAX_DICTIONARY_WORD_LENGTH 24
constexpr int brotli_min_dictionary_word_length = 4;
constexpr int brotli_max_dictionary_word_length = 24;

const brotli_dictionary_s *get_brotli_dictionary(void);

Expand Down
40 changes: 10 additions & 30 deletions cpp/src/io/comp/brotli_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ THE SOFTWARE.
#define CONSTANT static const
#endif

#define BROTLI_NUM_BLOCK_LEN_SYMBOLS 26
#define BROTLI_NUM_LITERAL_SYMBOLS 256
#define BROTLI_NUM_COMMAND_SYMBOLS 704
constexpr int brotli_num_block_len_symbols = 26;
constexpr int brotli_num_literal_symbols = 256;
constexpr int brotli_num_command_symbols = 704;

CONSTANT uint8_t kReverseBits[1 << 8] = {
0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
Expand Down Expand Up @@ -2149,14 +2149,6 @@ CONSTANT uint8_t kContextLookup[2048] = {
7,
};

/* typeof(MODE) == ContextType; returns ContextLut */
#define BROTLI_CONTEXT_LUT(MODE) ((MODE) << 9)

/* typeof(LUT) == const uint8_t* */
#define BROTLI_NEED_CONTEXT_LUT(MODE) ((MODE) < (4 << 9))
#define BROTLI_CONTEXT(P1, P2, LUT) \
(kContextLookup[(LUT) + (P1)] | kContextLookup[(LUT) + (P2) + 256])

typedef struct CmdLutElement {
uint8_t insert_len_extra_bits;
uint8_t copy_len_extra_bits;
Expand All @@ -2166,7 +2158,7 @@ typedef struct CmdLutElement {
uint16_t copy_len_offset;
} CmdLutElement;

CONSTANT CmdLutElement kCmdLut[BROTLI_NUM_COMMAND_SYMBOLS] = {
CONSTANT CmdLutElement kCmdLut[brotli_num_command_symbols] = {
{0x00, 0x00, 0, 0x00, 0x0000, 0x0002}, {0x00, 0x00, 0, 0x01, 0x0000, 0x0003},
{0x00, 0x00, 0, 0x02, 0x0000, 0x0004}, {0x00, 0x00, 0, 0x03, 0x0000, 0x0005},
{0x00, 0x00, 0, 0x03, 0x0000, 0x0006}, {0x00, 0x00, 0, 0x03, 0x0000, 0x0007},
Expand Down Expand Up @@ -2526,10 +2518,10 @@ CONSTANT uint8_t kCodeLengthPrefixLength[16] = {2, 2, 2, 3, 2, 2, 2, 4, 2, 2, 2,
CONSTANT uint8_t kCodeLengthPrefixValue[16] = {0, 4, 3, 2, 0, 4, 3, 1, 0, 4, 3, 2, 0, 4, 3, 5};

// Represents the range of values belonging to a prefix code: [offset, offset + 2^nbits)
CONSTANT uint16_t kBlockLengthPrefixCodeOffset[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
CONSTANT uint16_t kBlockLengthPrefixCodeOffset[brotli_num_block_len_symbols] = {
1, 5, 9, 13, 17, 25, 33, 41, 49, 65, 81, 97, 113,
145, 177, 209, 241, 305, 369, 497, 753, 1265, 2289, 4337, 8433, 16625};
CONSTANT uint8_t kBlockLengthPrefixCodeBits[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
CONSTANT uint8_t kBlockLengthPrefixCodeBits[brotli_num_block_len_symbols] = {
2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24};

// Maximum possible Huffman table size for an alphabet size of (index * 32),
Expand All @@ -2538,12 +2530,11 @@ CONSTANT uint16_t kMaxHuffmanTableSize[] = {
256, 402, 436, 468, 500, 534, 566, 598, 630, 662, 694, 726, 758,
790, 822, 854, 886, 920, 952, 984, 1016, 1048, 1080, 1112, 1144, 1176,
1208, 1240, 1272, 1304, 1336, 1368, 1400, 1432, 1464, 1496, 1528};
// BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26
#define BROTLI_HUFFMAN_MAX_SIZE_26 396
// BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258
#define BROTLI_HUFFMAN_MAX_SIZE_258 632

constexpr int brotli_huffman_max_size_26 = 396;
constexpr int brotli_huffman_max_size_258 = 632;
// Max table size for context map
#define BROTLI_HUFFMAN_MAX_SIZE_272 646
constexpr int brotli_huffman_max_size_272 = 646;

enum brotli_transform_type_e {
BROTLI_TRANSFORM_IDENTITY = 0,
Expand All @@ -2570,8 +2561,6 @@ enum brotli_transform_type_e {
BROTLI_NUM_TRANSFORM_TYPES // Counts transforms, not a transform itself.
};

#define BROTLI_TRANSFORMS_MAX_CUT_OFF BROTLI_TRANSFORM_OMIT_LAST_9

/* RFC 7932 transforms string data */
CONSTANT uint8_t kPrefixSuffix[217] = {
0x01, 0x20, 0x02, 0x2C, 0x20, 0x08, 0x20, 0x6F, 0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x04, 0x20,
Expand Down Expand Up @@ -2662,12 +2651,3 @@ CONSTANT uint8_t kTransformsData[] = {
};

CONSTANT int kNumTransforms = (int)(sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])));

/* result is uint8_t. */
#define BROTLI_TRANSFORM_PREFIX_ID(I) (kTransformsData[((I)*3) + 0])
#define BROTLI_TRANSFORM_TYPE(I) (kTransformsData[((I)*3) + 1])
#define BROTLI_TRANSFORM_SUFFIX_ID(I) (kTransformsData[((I)*3) + 2])

/* result is const uint8_t*. */
#define BROTLI_TRANSFORM_PREFIX(I) (&kPrefixSuffix[kPrefixSuffixMap[BROTLI_TRANSFORM_PREFIX_ID(I)]])
#define BROTLI_TRANSFORM_SUFFIX(I) (&kPrefixSuffix[kPrefixSuffixMap[BROTLI_TRANSFORM_SUFFIX_ID(I)]])
Loading

0 comments on commit 1771a8f

Please sign in to comment.