Skip to content

Commit

Permalink
Fix for non-virtual memory with CXI
Browse files Browse the repository at this point in the history
Note: Not yet fixed for speculative preload mode
  • Loading branch information
Franz Poeschel committed Nov 17, 2023
1 parent 2affe29 commit 14ffd0d
Showing 1 changed file with 38 additions and 21 deletions.
59 changes: 38 additions & 21 deletions source/adios2/toolkit/sst/dp/rdma_dp.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,23 +58,28 @@ pthread_mutex_t wsr_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t ts_mutex = PTHREAD_MUTEX_INITIALIZER;

int sst_fi_mr_reg(struct fid_domain *domain, const void *buf, size_t len,
uint64_t access, uint64_t offset, uint64_t requested_key,
uint64_t acs, uint64_t offset, uint64_t requested_key,
uint64_t flags, struct fid_mr **mr, void *context,
struct fid_ep *signal)
struct fid_ep *endpoint, int mr_mode)
{
int res = fi_mr_reg(domain, buf, len, access, offset, requested_key, flags,
mr, context);
int res = fi_mr_reg(domain, buf, len, acs, offset, requested_key, flags, mr,
context);
bool is_mr_endpoint = (mr_mode & FI_MR_ENDPOINT) != 0;
if (!is_mr_endpoint)
{
return res;
}
if (res != FI_SUCCESS)
{
printf("fi_mr_reg failed with %ul (%s)\n", res, fi_strerror(res));
return res;
}

/*
* When the domain_attr->mr_mode includes FI_MR_ENDPOINT, the memory region
* When the domain_attr->mr_mode includes FI_MR_ENDPOINT, the memory region
* needs to be bound to the endpoint and explicitly enabled after that.
*/
res = fi_mr_bind(*mr, &signal->fid, 0);
res = fi_mr_bind(*mr, &endpoint->fid, 0);
if (res != FI_SUCCESS)
{
printf("fi_mr_bind failed with %ul (%s)\n", res, fi_strerror(res));
Expand Down Expand Up @@ -106,6 +111,7 @@ struct fabric_state
struct fi_info *info;
// struct fi_info *linfo;
int local_mr_req;
int mr_virt_addr;
int rx_cq_data;
size_t addr_len;
size_t msg_prefix_size;
Expand Down Expand Up @@ -336,13 +342,16 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,

/*
* FI_MR_ALLOCATED and FI_MR_ENDPOINT are required for the CXI provider.
* FI_MR_VIRT_ADDR, FI_MR_LOCAL and FI_MR_PROV_KEY are for compatibility
* FI_MR_LOCAL and FI_MR_PROV_KEY are for compatibility
* with the rest of the legacy SST-libfabric implementation (where mr_mode
* used to be FI_MR_BASIC which is equivalent to FI_MR_VIRT_ADDR |
* FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_LOCAL)
*/
info->domain_attr->mr_mode = FI_MR_ALLOCATED | FI_MR_ENDPOINT |
FI_MR_VIRT_ADDR | FI_MR_PROV_KEY | FI_MR_LOCAL;
info->domain_attr->mr_mode =
FI_MR_ALLOCATED | FI_MR_ENDPOINT | FI_MR_PROV_KEY | FI_MR_LOCAL;

fabric->mr_virt_addr = 0;

#ifdef SST_HAVE_CRAY_DRC
if (strstr(info->fabric_attr->prov_name, "gni") && fabric->auth_key)
{
Expand Down Expand Up @@ -1183,7 +1192,7 @@ static DP_WSR_Stream RdmaInitWriterPerReader(CP_Services Svcs,
sst_fi_mr_reg(Fabric->domain, ReaderRollHandle->Block,
readerCohortSize * sizeof(struct _RdmaBuffer),
FI_REMOTE_WRITE, 0, 0, 0,
&WSR_Stream->rrmr, Fabric->ctx, Fabric->signal);
&WSR_Stream->rrmr, Fabric->ctx, Fabric->signal, Fabric->info->domain_attr->mr_mode);
ReaderRollHandle->Key = fi_mr_key(WSR_Stream->rrmr);

WSR_Stream->WriterContactInfo = ContactInfo;
Expand Down Expand Up @@ -1336,11 +1345,19 @@ static ssize_t PostRead(CP_Services Svcs, Rdma_RS_Stream RS_Stream, int Rank,
// register dest buffer
sst_fi_mr_reg(Fabric->domain, Buffer, Length, FI_READ, 0,
0, 0, &ret->LocalMR, Fabric->ctx,
Fabric->signal);
Fabric->signal, Fabric->info->domain_attr->mr_mode);
LocalDesc = fi_mr_desc(ret->LocalMR);
}

Addr = Info->Block + Offset;
if (Fabric->mr_virt_addr)
{
Addr = Info->Block + Offset;
}
else
{
Addr = NULL;
Addr += Offset;
}

Svcs->verbose(
RS_Stream->CP_Stream, DPTraceVerbose,
Expand Down Expand Up @@ -1687,7 +1704,7 @@ static void RdmaProvideTimestep(CP_Services Svcs, DP_WS_Stream Stream_v,

sst_fi_mr_reg(Fabric->domain, Data->block, Data->DataSize,
FI_WRITE | FI_REMOTE_READ, 0, 0, 0,
&Entry->mr, Fabric->ctx, Fabric->signal);
&Entry->mr, Fabric->ctx, Fabric->signal, Fabric->info->domain_attr->mr_mode);
Entry->Key = fi_mr_key(Entry->mr);
if (Fabric->local_mr_req)
{
Expand Down Expand Up @@ -2201,16 +2218,16 @@ static void PostPreload(CP_Services Svcs, Rdma_RS_Stream Stream, long Timestep)
sst_fi_mr_reg(Fabric->domain, PreloadBuffer->Handle.Block,
PreloadBuffer->BufferLen, FI_REMOTE_WRITE, 0,
0, 0, &Stream->pbmr, Fabric->ctx,
Fabric->signal);
Fabric->signal, Fabric->info->domain_attr->mr_mode);
PreloadKey = fi_mr_key(Stream->pbmr);

SBSize = sizeof(*SendBuffer) * StepLog->WRanks;
SendBuffer = malloc(SBSize);
if (Fabric->local_mr_req)
{
sst_fi_mr_reg(Fabric->domain, SendBuffer, SBSize, FI_WRITE, 0,
0, 0, &sbmr, Fabric->ctx,
Fabric->signal);
sst_fi_mr_reg(Fabric->domain, SendBuffer, SBSize, FI_WRITE, 0, 0, 0,
&sbmr, Fabric->ctx, Fabric->signal,
Fabric->info->domain_attr->mr_mode);
sbdesc = fi_mr_desc(sbmr);
}

Expand All @@ -2219,8 +2236,8 @@ static void PostPreload(CP_Services Svcs, Rdma_RS_Stream Stream, long Timestep)
RBLen = 2 * StepLog->Entries * DP_DATA_RECV_SIZE;
Stream->RecvDataBuffer = malloc(RBLen);
sst_fi_mr_reg(Fabric->domain, Stream->RecvDataBuffer, RBLen, FI_RECV, 0,
0, 0, &Stream->rbmr, Fabric->ctx,
Fabric->signal);
0, 0, &Stream->rbmr, Fabric->ctx, Fabric->signal,
Fabric->info->domain_attr->mr_mode);
Stream->rbdesc = fi_mr_desc(Stream->rbmr);
RecvBuffer = (uint8_t *)Stream->RecvDataBuffer;
for (i = 0; i < 2 * StepLog->Entries; i++)
Expand All @@ -2247,7 +2264,7 @@ static void PostPreload(CP_Services Svcs, Rdma_RS_Stream Stream, long Timestep)
(sizeof(struct _RdmaBuffer) * RankLog->Entries) +
sizeof(uint64_t),
FI_REMOTE_READ, 0, 0, 0,
&RankLog->preqbmr, Fabric->ctx, Fabric->signal);
&RankLog->preqbmr, Fabric->ctx, Fabric->signal, Fabric->info->domain_attr->mr_mode);
for (j = 0; j < RankLog->Entries; j++)
{
ReqLog = &RankLog->ReqLog[j];
Expand Down Expand Up @@ -2408,7 +2425,7 @@ static void PullSelection(CP_Services Svcs, Rdma_WSR_Stream Stream)
{
sst_fi_mr_reg(Fabric->domain, ReqBuffer.Handle.Block,
ReqBuffer.BufferLen, FI_READ, 0, 0, 0,
&rrmr, Fabric->ctx, Fabric->signal);
&rrmr, Fabric->ctx, Fabric->signal, Fabric->info->domain_attr->mr_mode);
rrdesc = fi_mr_desc(rrmr);
}

Expand Down

0 comments on commit 14ffd0d

Please sign in to comment.