Skip to content

Commit 79b13f3

Browse files
committed
darray and subarray are now first class citizens in Open MPI. They can be stored
in packed form and reloaded, as any other type (this is mainly for one sided). This commit was SVN r24480.
1 parent 95f4e0b commit 79b13f3

File tree

7 files changed

+432
-370
lines changed

7 files changed

+432
-370
lines changed

ompi/datatype/Makefile.am

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
# University Research and Technology
55
# Corporation. All rights reserved.
6-
# Copyright (c) 2004-2009 The University of Tennessee and The University
6+
# Copyright (c) 2004-2011 The University of Tennessee and The University
77
# of Tennessee Research Foundation. All rights
88
# reserved.
99
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -35,6 +35,8 @@ libdatatype_la_SOURCES = \
3535
ompi_datatype_create_indexed.c \
3636
ompi_datatype_create_struct.c \
3737
ompi_datatype_create_vector.c \
38+
ompi_datatype_create_darray.c \
39+
ompi_datatype_create_subarray.c \
3840
ompi_datatype_external32.c \
3941
ompi_datatype_match_size.c \
4042
ompi_datatype_module.c \

ompi/datatype/ompi_datatype.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,13 @@ OMPI_DECLSPEC int32_t ompi_datatype_create_indexed_block( int count, int bLength
209209
const ompi_datatype_t* oldType, ompi_datatype_t** newType );
210210
OMPI_DECLSPEC int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const OPAL_PTRDIFF_TYPE* pDisp,
211211
ompi_datatype_t* const* pTypes, ompi_datatype_t** newType );
212+
OMPI_DECLSPEC int32_t ompi_datatype_create_darray( int size, int rank, int ndims, int const* gsize_array,
213+
int const* distrib_array, int const* darg_array,
214+
int const* psize_array, int order, const ompi_datatype_t* oldtype,
215+
ompi_datatype_t** newtype);
216+
OMPI_DECLSPEC int32_t ompi_datatype_create_subarray(int ndims, int const* size_array, int const* subsize_array,
217+
int const* start_array, int order,
218+
const ompi_datatype_t* oldtype, ompi_datatype_t** newtype);
212219
static inline int32_t
213220
ompi_datatype_create_resized( const ompi_datatype_t* oldType, OPAL_PTRDIFF_TYPE lb, OPAL_PTRDIFF_TYPE extent, ompi_datatype_t** newType )
214221
{

ompi/datatype/ompi_datatype_args.c

Lines changed: 10 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -705,48 +705,26 @@ static ompi_datatype_t* __ompi_datatype_create_from_args( int32_t* i, MPI_Aint*
705705
break;
706706
/******************************************************************/
707707
case MPI_COMBINER_SUBARRAY:
708-
/*pos = 1;
709-
pArgs->i[0] = i[0][0];
710-
memcpy( pArgs->i + pos, i[1], pArgs->i[0] * sizeof(int) );
711-
pos += pArgs->i[0];
712-
memcpy( pArgs->i + pos, i[2], pArgs->i[0] * sizeof(int) );
713-
pos += pArgs->i[0];
714-
memcpy( pArgs->i + pos, i[3], pArgs->i[0] * sizeof(int) );
715-
pos += pArgs->i[0];
716-
pArgs->i[pos] = i[4][0];
717-
*/
718-
#if 0
708+
ompi_datatype_create_subarray( i[0], &i[1 + 0 * i[0]], &i[1 + 1 * i[0]],
709+
&i[1 + 2 * i[0]], i[1 + 3 * i[0]],
710+
d[0], &datatype );
719711
{
720-
int* a_i[5]; a_i[0] = &i[0]; a_i[1] = &i[1 + 0 * i[0]]; a_i[2] = &i[1 + 1 * i[0]]; a_i[3] = &i[1 + 2 * i[0]];
712+
int* a_i[5]; a_i[0] = &i[0]; a_i[1] = &i[1 + 0 * i[0]]; a_i[2] = &i[1 + 1 * i[0]]; a_i[3] = &i[1 + 2 * i[0]]; a_i[4] = &i[1 + 3 * i[0]];
721713
ompi_datatype_set_args( datatype, 3 * i[0] + 2, a_i, 0, NULL, 1, d, MPI_COMBINER_SUBARRAY);
722714
}
723-
#endif
724715
break;
725716
/******************************************************************/
726717
case MPI_COMBINER_DARRAY:
727-
/*pos = 3;
728-
pArgs->i[0] = i[0][0];
729-
pArgs->i[1] = i[1][0];
730-
pArgs->i[2] = i[2][0];
731-
732-
memcpy( pArgs->i + pos, i[3], i[2][0] * sizeof(int) );
733-
pos += i[2][0];
734-
memcpy( pArgs->i + pos, i[4], i[2][0] * sizeof(int) );
735-
pos += i[2][0];
736-
memcpy( pArgs->i + pos, i[5], i[2][0] * sizeof(int) );
737-
pos += i[2][0];
738-
memcpy( pArgs->i + pos, i[6], i[2][0] * sizeof(int) );
739-
pos += i[2][0];
740-
pArgs->i[pos] = i[7][0];
741-
*/
742-
#if 0
718+
ompi_datatype_create_darray( i[0] /* size */, i[1] /* rank */, i[2] /* ndims */,
719+
&i[3 + 0 * i[0]], &i[3 + 1 * i[0]],
720+
&i[3 + 2 * i[0]], &i[3 + 3 * i[0]],
721+
i[3 + 4 * i[0]], d[0], &datatype );
743722
{
744723
int* a_i[8]; a_i[0] = &i[0]; a_i[1] = &i[1]; a_i[2] = &i[2];
745-
a_i[3] = &i[1 + 0 * i[0]]; a_i[4] = &i[1 + 1 * i[0]]; a_i[5] = &i[1 + 2 * i[0]];
746-
a_i[6] = &i[1 + 3 * i[0]]; a_i[7] = &i[1 + 4 * i[0]];
724+
a_i[3] = &i[3 + 0 * i[0]]; a_i[4] = &i[3 + 1 * i[0]]; a_i[5] = &i[3 + 2 * i[0]];
725+
a_i[6] = &i[3 + 3 * i[0]]; a_i[7] = &i[3 + 4 * i[0]];
747726
ompi_datatype_set_args( datatype, 4 * i[0] + 4,a_i, 0, NULL, 1, d, MPI_COMBINER_DARRAY);
748727
}
749-
#endif
750728
break;
751729
/******************************************************************/
752730
case MPI_COMBINER_F90_REAL:
Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; -*- */
2+
/*
3+
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
4+
* University Research and Technology
5+
* Corporation. All rights reserved.
6+
* Copyright (c) 2004-2011 The University of Tennessee and The University
7+
* of Tennessee Research Foundation. All rights
8+
* reserved.
9+
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
10+
* University of Stuttgart. All rights reserved.
11+
* Copyright (c) 2004-2006 The Regents of the University of California.
12+
* All rights reserved.
13+
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
14+
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
15+
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
16+
* $COPYRIGHT$
17+
*
18+
* Additional copyrights may follow
19+
*
20+
* $HEADER$
21+
*/
22+
23+
#include "ompi_config.h"
24+
25+
#include <stddef.h>
26+
27+
#include "ompi/constants.h"
28+
#include "ompi/datatype/ompi_datatype.h"
29+
30+
static int
31+
block(const int *gsize_array, int dim, int ndims, int nprocs,
32+
int rank, int darg, int order, ptrdiff_t orig_extent,
33+
ompi_datatype_t *type_old, ompi_datatype_t **type_new,
34+
ptrdiff_t *st_offset)
35+
{
36+
int blksize, global_size, mysize, i, j, rc, start_loop, step;
37+
ptrdiff_t stride;
38+
39+
global_size = gsize_array[dim];
40+
41+
if (darg == MPI_DISTRIBUTE_DFLT_DARG)
42+
blksize = (global_size + nprocs - 1) / nprocs;
43+
else {
44+
blksize = darg;
45+
}
46+
47+
j = global_size - blksize*rank;
48+
mysize = blksize < j ? blksize : j;
49+
if (mysize < 0) mysize = 0;
50+
51+
if (MPI_ORDER_C == order) {
52+
start_loop = ndims - 1 ; step = -1;
53+
} else {
54+
start_loop = 0 ; step = 1;
55+
}
56+
57+
stride = orig_extent;
58+
if (dim == start_loop) {
59+
rc = ompi_datatype_create_contiguous(mysize, type_old, type_new);
60+
if (OMPI_SUCCESS != rc) return rc;
61+
} else {
62+
for (i = start_loop ; i != dim ; i += step) {
63+
stride *= gsize_array[i];
64+
}
65+
rc = ompi_datatype_create_hvector(mysize, 1, stride, type_old, type_new);
66+
if (OMPI_SUCCESS != rc) return rc;
67+
}
68+
69+
*st_offset = blksize * rank;
70+
/* in terms of no. of elements of type oldtype in this dimension */
71+
if (mysize == 0) *st_offset = 0;
72+
73+
return OMPI_SUCCESS;
74+
}
75+
76+
77+
static int
78+
cyclic(const int *gsize_array, int dim, int ndims, int nprocs,
79+
int rank, int darg, int order, ptrdiff_t orig_extent,
80+
ompi_datatype_t* type_old, ompi_datatype_t **type_new,
81+
ptrdiff_t *st_offset)
82+
{
83+
int blksize, i, blklens[2], st_index, end_index, local_size, rem, count, rc;
84+
ptrdiff_t stride, disps[2];
85+
ompi_datatype_t *type_tmp, *types[2];
86+
87+
if (darg == MPI_DISTRIBUTE_DFLT_DARG) {
88+
blksize = 1;
89+
} else {
90+
blksize = darg;
91+
}
92+
93+
st_index = rank * blksize;
94+
end_index = gsize_array[dim] - 1;
95+
96+
if (end_index < st_index) {
97+
local_size = 0;
98+
} else {
99+
local_size = ((end_index - st_index + 1)/(nprocs*blksize))*blksize;
100+
rem = (end_index - st_index + 1) % (nprocs*blksize);
101+
local_size += rem < blksize ? rem : blksize;
102+
}
103+
104+
count = local_size / blksize;
105+
rem = local_size % blksize;
106+
107+
stride = nprocs*blksize*orig_extent;
108+
if (order == MPI_ORDER_FORTRAN) {
109+
for (i=0; i<dim; i++) {
110+
stride *= gsize_array[i];
111+
}
112+
} else {
113+
for (i=ndims-1; i>dim; i--) {
114+
stride *= gsize_array[i];
115+
}
116+
}
117+
118+
rc = ompi_datatype_create_hvector(count, blksize, stride, type_old, type_new);
119+
if (OMPI_SUCCESS != rc) return rc;
120+
121+
if (rem) {
122+
/* if the last block is of size less than blksize, include
123+
it separately using MPI_Type_struct */
124+
125+
types[0] = *type_new;
126+
types[1] = type_old;
127+
disps[0] = 0;
128+
disps[1] = count*stride;
129+
blklens[0] = 1;
130+
blklens[1] = rem;
131+
132+
rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp);
133+
ompi_datatype_destroy(type_new);
134+
/* even in error condition, need to destroy type_new, so check
135+
for error after destroy. */
136+
if (OMPI_SUCCESS != rc) return rc;
137+
*type_new = type_tmp;
138+
}
139+
140+
/* need to set the UB for block-cyclic to work */
141+
types[0] = *type_new;
142+
types[1] = MPI_UB;
143+
disps[0] = 0;
144+
disps[1] = orig_extent;
145+
if (order == MPI_ORDER_FORTRAN) {
146+
for (i=0; i<=dim; i++) {
147+
disps[1] *= gsize_array[i];
148+
}
149+
} else {
150+
for (i=ndims-1; i>=dim; i--) {
151+
disps[1] *= gsize_array[i];
152+
}
153+
}
154+
blklens[0] = blklens[1] = 1;
155+
rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp);
156+
ompi_datatype_destroy(type_new);
157+
/* even in error condition, need to destroy type_new, so check
158+
for error after destroy. */
159+
if (OMPI_SUCCESS != rc) return rc;
160+
*type_new = type_tmp;
161+
162+
*st_offset = rank * blksize;
163+
/* in terms of no. of elements of type oldtype in this dimension */
164+
if (local_size == 0) *st_offset = 0;
165+
166+
return OMPI_SUCCESS;
167+
}
168+
169+
int32_t ompi_datatype_create_darray(int size,
170+
int rank,
171+
int ndims,
172+
int const* gsize_array,
173+
int const* distrib_array,
174+
int const* darg_array,
175+
int const* psize_array,
176+
int order,
177+
const ompi_datatype_t* oldtype,
178+
ompi_datatype_t** newtype)
179+
{
180+
ompi_datatype_t *lastType;
181+
ptrdiff_t orig_extent, *st_offsets = NULL;
182+
int i, start_loop, end_loop, step;
183+
int *coords = NULL, rc = OMPI_SUCCESS;
184+
185+
/* speedy corner case */
186+
if (ndims < 1) {
187+
/* Don't just return MPI_DATATYPE_NULL as that can't be
188+
MPI_TYPE_FREE()ed, and that seems bad */
189+
*newtype = ompi_datatype_create(0);
190+
ompi_datatype_add(*newtype, &ompi_mpi_datatype_null.dt, 0, 0, 0);
191+
return MPI_SUCCESS;
192+
}
193+
194+
rc = ompi_datatype_type_extent(oldtype, &orig_extent);
195+
if (MPI_SUCCESS != rc) goto cleanup;
196+
197+
/* calculate position in grid using row-major ordering */
198+
{
199+
int tmp_rank = rank, procs = size;
200+
201+
coords = (int *) malloc(ndims * sizeof(int));
202+
for (i = 0 ; i < ndims ; i++) {
203+
procs = procs / psize_array[i];
204+
coords[i] = tmp_rank / procs;
205+
tmp_rank = tmp_rank % procs;
206+
}
207+
}
208+
209+
st_offsets = (ptrdiff_t *) malloc(ndims * sizeof(ptrdiff_t));
210+
211+
/* duplicate type to here to 1) deal with constness without
212+
casting and 2) eliminate need to for conditional destroy below.
213+
Lame, yes. But cleaner code all around. */
214+
rc = ompi_datatype_duplicate(oldtype, &lastType);
215+
if (OMPI_SUCCESS != rc) goto cleanup;
216+
217+
/* figure out ordering issues */
218+
if (MPI_ORDER_C == order) {
219+
start_loop = ndims - 1 ; step = -1; end_loop = -1;
220+
} else {
221+
start_loop = 0 ; step = 1; end_loop = ndims;
222+
}
223+
224+
/* Build up array */
225+
for (i = start_loop ; i != end_loop; i += step) {
226+
int nprocs, tmp_rank;
227+
228+
switch(distrib_array[i]) {
229+
case MPI_DISTRIBUTE_BLOCK:
230+
rc = block(gsize_array, i, ndims, psize_array[i], coords[i],
231+
darg_array[i], order, orig_extent,
232+
lastType, newtype, st_offsets+i);
233+
break;
234+
case MPI_DISTRIBUTE_CYCLIC:
235+
rc = cyclic(gsize_array, i, ndims, psize_array[i], coords[i],
236+
darg_array[i], order, orig_extent,
237+
lastType, newtype, st_offsets+i);
238+
break;
239+
case MPI_DISTRIBUTE_NONE:
240+
/* treat it as a block distribution on 1 process */
241+
if (order == MPI_ORDER_C) {
242+
nprocs = psize_array[i]; tmp_rank = coords[i];
243+
} else {
244+
nprocs = 1; tmp_rank = 0;
245+
}
246+
247+
rc = block(gsize_array, i, ndims, nprocs, tmp_rank,
248+
MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent,
249+
lastType, newtype, st_offsets+i);
250+
break;
251+
default:
252+
rc = MPI_ERR_ARG;
253+
}
254+
ompi_datatype_destroy(&lastType);
255+
/* need to destroy the old type even in error condition, so
256+
don't check return code from above until after cleanup. */
257+
if (MPI_SUCCESS != rc) goto cleanup;
258+
lastType = *newtype;
259+
}
260+
261+
262+
/* set displacement and UB correctly. Use struct instead of
263+
resized for same reason as subarray */
264+
{
265+
ptrdiff_t displs[3];
266+
ompi_datatype_t *types[3];
267+
int tmp_size, blength[3] = { 1, 1, 1};
268+
269+
displs[1] = st_offsets[start_loop];
270+
tmp_size = 1;
271+
for (i = start_loop + step ; i != end_loop ; i += step) {
272+
tmp_size *= gsize_array[i - step];
273+
displs[1] += tmp_size * st_offsets[i];
274+
}
275+
276+
displs[0] = 0;
277+
displs[1] *= orig_extent;
278+
displs[2] = orig_extent;
279+
for (i = 0 ; i < ndims ; i++) {
280+
displs[2] *= gsize_array[i];
281+
}
282+
types[0] = MPI_LB; types[1] = lastType; types[2] = MPI_UB;
283+
284+
rc = ompi_datatype_create_struct(3, blength, displs, types, newtype);
285+
ompi_datatype_destroy(&lastType);
286+
/* need to destroy the old type even in error condition, so
287+
don't check return code from above until after cleanup. */
288+
if (MPI_SUCCESS != rc) goto cleanup;
289+
}
290+
291+
cleanup:
292+
if (NULL != st_offsets) free(st_offsets);
293+
if (NULL != coords) free(coords);
294+
return OMPI_SUCCESS;
295+
}

0 commit comments

Comments
 (0)