Skip to content

Commit 0f379b8

Browse files
committed
UCP: Added MIN_RMA_CHUNK_SIZE
1 parent 7ec95b9 commit 0f379b8

File tree

6 files changed

+29
-11
lines changed

6 files changed

+29
-11
lines changed

src/ucp/core/ucp_context.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,11 @@ static ucs_config_field_t ucp_context_config_table[] = {
239239
"multiple rails. Must be greater than 0.",
240240
ucs_offsetof(ucp_context_config_t, min_rndv_chunk_size), UCS_CONFIG_TYPE_MEMUNITS},
241241

242+
{"MIN_RMA_CHUNK_SIZE", "16k",
243+
"Minimum chunk size to split the message sent with RMA protocol on\n"
244+
"multiple rails. Must be greater than 0.",
245+
ucs_offsetof(ucp_context_config_t, min_rma_chunk_size), UCS_CONFIG_TYPE_MEMUNITS},
246+
242247
{"RMA_ZCOPY_MAX_SEG_SIZE", "auto",
243248
"Max size of a segment for rma/rndv zcopy.",
244249
ucs_offsetof(ucp_context_config_t, rma_zcopy_max_seg_size), UCS_CONFIG_TYPE_MEMUNITS},
@@ -2127,6 +2132,12 @@ static ucs_status_t ucp_fill_config(ucp_context_h context,
21272132
return UCS_ERR_INVALID_PARAM;
21282133
}
21292134

2135+
if (context->config.ext.min_rma_chunk_size == 0) {
2136+
ucs_error("minimum chunk size for RMA protocol must be greater"
2137+
" than 0");
2138+
return UCS_ERR_INVALID_PARAM;
2139+
}
2140+
21302141
/* Save environment prefix to later notify user for unused variables */
21312142
context->config.env_prefix = ucs_strdup(config->env_prefix, "ucp config");
21322143
if (context->config.env_prefix == NULL) {

src/ucp/core/ucp_context.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ typedef struct ucp_context_config {
127127
/** Minimum allowed chunk size when splitting rndv message over multiple
128128
* lanes */
129129
size_t min_rndv_chunk_size;
130+
/** Minimum allowed chunk size when splitting rma message over multiple
131+
* lanes */
132+
size_t min_rma_chunk_size;
130133
/** Estimated number of endpoints */
131134
size_t estimated_num_eps;
132135
/** Estimated number of processes per node */

src/ucp/proto/proto_multi.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -311,9 +311,13 @@ ucs_status_t ucp_proto_multi_init(const ucp_proto_multi_init_params_t *params,
311311
/* Make sure fragment is not zero */
312312
ucs_assert(max_frag > 0);
313313

314-
/* Min chunk is scaled, but must be within HW limits */
315-
min_chunk = ucs_min(lane_perf->bandwidth * params->min_chunk /
316-
min_bandwidth, lane_perf->max_frag);
314+
/* Min chunk is scaled, but must be within HW limits.
315+
Min chunk cannot be less than UCP_MIN_BCOPY, as it's not worth to
316+
split tiny messages. */
317+
min_chunk = ucs_min(lane_perf->max_frag,
318+
ucs_max(UCP_MIN_BCOPY,
319+
lane_perf->bandwidth *
320+
params->min_chunk / min_bandwidth));
317321
max_frag = ucs_max(max_frag, min_chunk);
318322
lpriv->max_frag = max_frag;
319323
perf.max_frag += max_frag;

src/ucp/proto/proto_multi.inl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ ucp_proto_multi_max_payload(ucp_request_t *req,
6363
const ucp_proto_multi_lane_priv_t *lpriv,
6464
size_t hdr_size)
6565
{
66-
size_t length = req->send.state.dt_iter.length;
66+
size_t length = req->send.state.dt_iter.length;
67+
size_t offset = length - req->send.state.dt_iter.offset;
6768
size_t max_frag;
6869
size_t max_payload;
6970

@@ -77,16 +78,15 @@ ucp_proto_multi_max_payload(ucp_request_t *req,
7778

7879
/* Do not split very small sends to chunks, it's not worth it, and
7980
generic datatype may not be able to pack to a smaller buffer */
80-
if (length < UCP_MIN_BCOPY) {
81+
if (offset < lpriv->min_end_offset) {
8182
return max_frag;
8283
}
8384

8485
max_payload = ucs_min(ucp_proto_multi_scaled_length(lpriv->weight, length),
8586
max_frag);
8687
ucs_assertv(max_payload > 0,
8788
"length=%zu weight=%zu%% lpriv->max_frag=%zu hdr_size=%zu",
88-
req->send.state.dt_iter.length,
89-
ucp_proto_multi_scaled_length(lpriv->weight, 100),
89+
length, ucp_proto_multi_scaled_length(lpriv->weight, 100),
9090
lpriv->max_frag, hdr_size);
9191
return max_payload;
9292
}

src/ucp/rma/get_offload.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ ucp_proto_get_offload_bcopy_probe(const ucp_proto_init_params_t *init_params)
109109
.super.exclude_map = 0,
110110
.super.reg_mem_info = ucp_mem_info_unknown,
111111
.max_lanes = UCP_PROTO_RMA_MAX_BCOPY_LANES,
112-
.min_chunk = 0,
112+
.min_chunk = context->config.ext.min_rma_chunk_size,
113113
.initial_reg_md_map = 0,
114114
.first.tl_cap_flags = UCT_IFACE_FLAG_GET_BCOPY,
115115
.first.lane_type = UCP_LANE_TYPE_RMA_BW,
@@ -219,7 +219,7 @@ ucp_proto_get_offload_zcopy_probe(const ucp_proto_init_params_t *init_params)
219219
.super.reg_mem_info = ucp_proto_common_select_param_mem_info(
220220
init_params->select_param),
221221
.max_lanes = context->config.ext.max_rma_lanes,
222-
.min_chunk = 0,
222+
.min_chunk = context->config.ext.min_rma_chunk_size,
223223
.initial_reg_md_map = 0,
224224
.first.tl_cap_flags = UCT_IFACE_FLAG_GET_ZCOPY,
225225
.first.lane_type = UCP_LANE_TYPE_RMA_BW,

src/ucp/rma/put_offload.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ ucp_proto_put_offload_bcopy_probe(const ucp_proto_init_params_t *init_params)
189189
.super.exclude_map = 0,
190190
.super.reg_mem_info = ucp_mem_info_unknown,
191191
.max_lanes = UCP_PROTO_RMA_MAX_BCOPY_LANES,
192-
.min_chunk = 0,
192+
.min_chunk = context->config.ext.min_rma_chunk_size,
193193
.initial_reg_md_map = 0,
194194
.first.tl_cap_flags = UCT_IFACE_FLAG_PUT_BCOPY,
195195
.first.lane_type = UCP_LANE_TYPE_RMA_BW,
@@ -280,7 +280,7 @@ ucp_proto_put_offload_zcopy_probe(const ucp_proto_init_params_t *init_params)
280280
.super.reg_mem_info = ucp_proto_common_select_param_mem_info(
281281
init_params->select_param),
282282
.max_lanes = context->config.ext.max_rma_lanes,
283-
.min_chunk = 0,
283+
.min_chunk = context->config.ext.min_rma_chunk_size,
284284
.initial_reg_md_map = 0,
285285
.first.tl_cap_flags = UCT_IFACE_FLAG_PUT_ZCOPY,
286286
.first.lane_type = UCP_LANE_TYPE_RMA_BW,

0 commit comments

Comments
 (0)