Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 20 additions & 66 deletions ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_hints.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
char *value;
int flag;
ADIO_Offset stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
struct lov_user_md lum = { 0 };
int err, myrank, fd_sys, perm, amode, old_mask;
int err, myrank;
static char myname[] = "ADIOI_LUSTRE_SETINFO";

value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
Expand All @@ -47,18 +46,25 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* striping information */
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
value, &flag);
if (flag)
if (flag) {
ADIOI_Info_set(fd->info, "striping_unit", value);
str_unit=atoll(value);
}

ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
value, &flag);
if (flag)
if (flag) {
ADIOI_Info_set(fd->info, "striping_factor", value);
str_factor=atoll(value);
}

ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
MPI_MAX_INFO_VAL, value, &flag);
if (flag)
if (flag) {
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
start_iodev=atoll(value);
}


/* direct read and write */
ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
Expand All @@ -84,72 +90,20 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
}
MPI_Bcast(stripe_val, 3, MPI_OFFSET, 0, fd->comm);

/* do not open file in hint processing. Open file in open routines,
* where we can better deal with EXCL flag . Continue to check the
* "all processors set a value" condition holds. */
if (stripe_val[0] != str_factor
|| stripe_val[1] != str_unit
|| stripe_val[2] != start_iodev) {
FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
"-striping_factor:striping_unit:start_iodevice "
"need to be identical across all processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
} else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
/* if user has specified striping info, process 0 tries to set it */
if (!myrank) {
if (fd->perm == ADIO_PERM_NULL) {
old_mask = umask(022);
umask(old_mask);
perm = old_mask ^ 0666;
}
else perm = fd->perm;

amode = 0;
if (fd->access_mode & ADIO_CREATE)
amode = amode | O_CREAT;
if (fd->access_mode & ADIO_RDONLY)
amode = amode | O_RDONLY;
if (fd->access_mode & ADIO_WRONLY)
amode = amode | O_WRONLY;
if (fd->access_mode & ADIO_RDWR)
amode = amode | O_RDWR;
if (fd->access_mode & ADIO_EXCL)
amode = amode | O_EXCL;

/* we need to create file so ensure this is set */
amode = amode | O_LOV_DELAY_CREATE | O_CREAT;

fd_sys = open(fd->filename, amode, perm);
if (fd_sys == -1) {
if (errno != EEXIST)
fprintf(stderr,
"Failure to open file %s %d %d\n",strerror(errno), amode, perm);
} else {
lum.lmm_magic = LOV_USER_MAGIC;
lum.lmm_pattern = 0;
lum.lmm_stripe_size = str_unit;
/* crude check for overflow of lustre internal datatypes.
* Silently cap to large value if user provides a value
* larger than lustre supports */
if (lum.lmm_stripe_size != str_unit) {
lum.lmm_stripe_size = UINT_MAX;
}
lum.lmm_stripe_count = str_factor;
if ( lum.lmm_stripe_count != str_factor) {
lum.lmm_stripe_count = USHRT_MAX;
}
lum.lmm_stripe_offset = start_iodev;
if (lum.lmm_stripe_offset != start_iodev) {
lum.lmm_stripe_offset = USHRT_MAX;
}

err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
if (err == -1 && errno != EEXIST) {
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
}
close(fd_sys);
}
} /* End of striping parameters validation */
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME("ADIOI_LUSTRE_SetInfo",
"str_factor or str_unit or start_iodev",
error_code);
ADIOI_Free(value);
return;
}
MPI_Barrier(fd->comm);
}

/* get other hint */
if (users_info != MPI_INFO_NULL) {
/* CO: IO Clients/OST,
Expand Down
144 changes: 103 additions & 41 deletions ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_open.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,17 @@
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
{
int perm, old_mask, amode, amode_direct;
int lumlen;
int lumlen, myrank, flag, set_layout=0, err;
struct lov_user_md *lum = NULL;
char *value;
ADIO_Offset str_factor = -1, str_unit=0, start_iodev=-1;

#if defined(MPICH) || !defined(PRINT_ERR_MSG)
static char myname[] = "ADIOI_LUSTRE_OPEN";
#endif

MPI_Comm_rank(fd->comm, &myrank);

if (fd->perm == ADIO_PERM_NULL) {
old_mask = umask(022);
umask(old_mask);
Expand All @@ -47,46 +50,102 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)

amode_direct = amode | O_DIRECT;

fd->fd_sys = open(fd->filename, amode|O_CREAT, perm);

if (fd->fd_sys != -1) {
int err;

/* get file striping information and set it in info */
/* odd malloc here because lov_user_md contains some fixed data and
* then a list of 'lmm_objects' representing stripe */
lumlen = sizeof(struct lov_user_md) +
MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
/* furthermore, Pascal Deveze reports that, even though we pass a
* "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
* struct are uninitialzed, the call can give an error. calloc in case
* there are other members that must be initialized and in case
* lov_user_md struct changes in future */
lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
lum->lmm_magic = LOV_USER_MAGIC;
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
if (!err) {
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));

fd->hints->striping_unit = lum->lmm_stripe_size;
sprintf(value, "%d", lum->lmm_stripe_size);
ADIOI_Info_set(fd->info, "striping_unit", value);

fd->hints->striping_factor = lum->lmm_stripe_count;
sprintf(value, "%d", lum->lmm_stripe_count);
ADIOI_Info_set(fd->info, "striping_factor", value);

fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
sprintf(value, "%d", lum->lmm_stripe_offset);
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);

ADIOI_Free(value);
}
ADIOI_Free(lum);

if (fd->access_mode & ADIO_APPEND)
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
}
/* odd length here because lov_user_md contains some fixed data and
* then a list of 'lmm_objects' representing stripe */
lumlen = sizeof(struct lov_user_md) +
MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);

value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
/* we already validated in LUSTRE_SetInfo that these are going to be the same */
if (fd->info != MPI_INFO_NULL) {
/* striping information */
ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL,
value, &flag);
if (flag)
str_unit=atoll(value);

ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL,
value, &flag);
if (flag)
str_factor=atoll(value);

ADIOI_Info_get(fd->info, "romio_lustre_start_iodevice",
MPI_MAX_INFO_VAL, value, &flag);
if (flag)
start_iodev=atoll(value);
}
if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0))
set_layout = 1;

/* if hints were set, we need to delay creation of any lustre objects.
* However, if we open the file with O_LOV_DELAY_CREATE and don't call the
* follow-up ioctl, subsequent writes will fail */
if (myrank == 0 && set_layout)
amode = amode | O_LOV_DELAY_CREATE;

fd->fd_sys = open(fd->filename, amode, perm);
if (fd->fd_sys == -1) goto fn_exit;

/* we can only set these hints on new files */
/* It was strange and buggy to open the file in the hint path. Instead,
* we'll apply the file tunings at open time */
if ((amode & O_CREAT) && set_layout ) {
/* if user has specified striping info, process 0 tries to set it */
if (!myrank) {
lum->lmm_magic = LOV_USER_MAGIC;
lum->lmm_pattern = 0;
/* crude check for overflow of lustre internal datatypes.
* Silently cap to large value if user provides a value
* larger than lustre supports */
if (str_unit > UINT_MAX)
lum->lmm_stripe_size = UINT_MAX;
else
lum->lmm_stripe_size = str_unit;

if (str_factor > USHRT_MAX)
lum->lmm_stripe_count = USHRT_MAX;
else
lum->lmm_stripe_count = str_factor;

if (start_iodev > USHRT_MAX)
lum->lmm_stripe_offset = USHRT_MAX;
else
lum->lmm_stripe_offset = start_iodev;
err = ioctl(fd->fd_sys, LL_IOC_LOV_SETSTRIPE, lum);
if (err == -1 && errno != EEXIST) {
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
/* not a fatal error, but user might care to know */
}
} /* End of striping parameters validation */
}

/* Pascal Deveze reports that, even though we pass a
* "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
* struct are uninitialzed, the call can give an error. zero it out in case
* there are other members that must be initialized and in case
* lov_user_md struct changes in future */
memset(lum, 0, lumlen);
lum->lmm_magic = LOV_USER_MAGIC;
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
if (!err) {

fd->hints->striping_unit = lum->lmm_stripe_size;
sprintf(value, "%d", lum->lmm_stripe_size);
ADIOI_Info_set(fd->info, "striping_unit", value);

fd->hints->striping_factor = lum->lmm_stripe_count;
sprintf(value, "%d", lum->lmm_stripe_count);
ADIOI_Info_set(fd->info, "striping_factor", value);

fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
sprintf(value, "%d", lum->lmm_stripe_offset);
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);

}

if (fd->access_mode & ADIO_APPEND)
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);

if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
Expand All @@ -101,6 +160,9 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
fd->direct_write = fd->direct_read = 0;
}
}
fn_exit:
ADIOI_Free(lum);
ADIOI_Free(value);

/* --BEGIN ERROR HANDLING-- */
if (fd->fd_sys == -1 || ((fd->fd_direct == -1) &&
Expand Down