|  | 
|  | 1 | +/* -*- Mode: C; c-basic-offset:4 ; -*- */ | 
|  | 2 | +/* | 
|  | 3 | + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana | 
|  | 4 | + *                         University Research and Technology | 
|  | 5 | + *                         Corporation.  All rights reserved. | 
|  | 6 | + * Copyright (c) 2004-2011 The University of Tennessee and The University | 
|  | 7 | + *                         of Tennessee Research Foundation.  All rights | 
|  | 8 | + *                         reserved. | 
|  | 9 | + * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, | 
|  | 10 | + *                         University of Stuttgart.  All rights reserved. | 
|  | 11 | + * Copyright (c) 2004-2006 The Regents of the University of California. | 
|  | 12 | + *                         All rights reserved. | 
|  | 13 | + * Copyright (c) 2009      Sun Microsystems, Inc. All rights reserved. | 
|  | 14 | + * Copyright (c) 2009      Oak Ridge National Labs.  All rights reserved. | 
|  | 15 | + * Copyright (c) 2010      Cisco Systems, Inc.  All rights reserved. | 
|  | 16 | + * $COPYRIGHT$ | 
|  | 17 | + * | 
|  | 18 | + * Additional copyrights may follow | 
|  | 19 | + * | 
|  | 20 | + * $HEADER$ | 
|  | 21 | + */ | 
|  | 22 | + | 
|  | 23 | +#include "ompi_config.h" | 
|  | 24 | + | 
|  | 25 | +#include <stddef.h> | 
|  | 26 | + | 
|  | 27 | +#include "ompi/constants.h" | 
|  | 28 | +#include "ompi/datatype/ompi_datatype.h" | 
|  | 29 | + | 
|  | 30 | +static int | 
|  | 31 | +block(const int *gsize_array, int dim, int ndims, int nprocs, | 
|  | 32 | +      int rank, int darg, int order, ptrdiff_t orig_extent, | 
|  | 33 | +      ompi_datatype_t *type_old, ompi_datatype_t **type_new, | 
|  | 34 | +      ptrdiff_t *st_offset) | 
|  | 35 | +{ | 
|  | 36 | +    int blksize, global_size, mysize, i, j, rc, start_loop, step; | 
|  | 37 | +    ptrdiff_t stride; | 
|  | 38 | + | 
|  | 39 | +    global_size = gsize_array[dim]; | 
|  | 40 | + | 
|  | 41 | +    if (darg == MPI_DISTRIBUTE_DFLT_DARG) | 
|  | 42 | +        blksize = (global_size + nprocs - 1) / nprocs; | 
|  | 43 | +    else { | 
|  | 44 | +        blksize = darg; | 
|  | 45 | +    } | 
|  | 46 | + | 
|  | 47 | +    j = global_size - blksize*rank; | 
|  | 48 | +    mysize = blksize < j ? blksize : j; | 
|  | 49 | +    if (mysize < 0) mysize = 0; | 
|  | 50 | + | 
|  | 51 | +    if (MPI_ORDER_C == order) { | 
|  | 52 | +        start_loop = ndims - 1 ; step = -1; | 
|  | 53 | +    } else { | 
|  | 54 | +        start_loop = 0 ; step = 1; | 
|  | 55 | +    } | 
|  | 56 | + | 
|  | 57 | +    stride = orig_extent; | 
|  | 58 | +    if (dim == start_loop) { | 
|  | 59 | +        rc = ompi_datatype_create_contiguous(mysize, type_old, type_new); | 
|  | 60 | +        if (OMPI_SUCCESS != rc) return rc; | 
|  | 61 | +    } else { | 
|  | 62 | +        for (i = start_loop ; i != dim ; i += step) { | 
|  | 63 | +            stride *= gsize_array[i]; | 
|  | 64 | +        } | 
|  | 65 | +        rc = ompi_datatype_create_hvector(mysize, 1, stride, type_old, type_new); | 
|  | 66 | +        if (OMPI_SUCCESS != rc) return rc; | 
|  | 67 | +    } | 
|  | 68 | + | 
|  | 69 | +    *st_offset = blksize * rank; | 
|  | 70 | +    /* in terms of no. of elements of type oldtype in this dimension */ | 
|  | 71 | +    if (mysize == 0) *st_offset = 0; | 
|  | 72 | + | 
|  | 73 | +    return OMPI_SUCCESS; | 
|  | 74 | +} | 
|  | 75 | + | 
|  | 76 | + | 
|  | 77 | +static int | 
|  | 78 | +cyclic(const int *gsize_array, int dim, int ndims, int nprocs, | 
|  | 79 | +       int rank, int darg, int order, ptrdiff_t orig_extent, | 
|  | 80 | +       ompi_datatype_t* type_old, ompi_datatype_t **type_new, | 
|  | 81 | +       ptrdiff_t *st_offset) | 
|  | 82 | +{ | 
|  | 83 | +    int blksize, i, blklens[2], st_index, end_index, local_size, rem, count, rc; | 
|  | 84 | +    ptrdiff_t stride, disps[2]; | 
|  | 85 | +    ompi_datatype_t *type_tmp, *types[2]; | 
|  | 86 | + | 
|  | 87 | +    if (darg == MPI_DISTRIBUTE_DFLT_DARG) { | 
|  | 88 | +        blksize = 1; | 
|  | 89 | +    } else { | 
|  | 90 | +        blksize = darg; | 
|  | 91 | +    } | 
|  | 92 | + | 
|  | 93 | +    st_index = rank * blksize; | 
|  | 94 | +    end_index = gsize_array[dim] - 1; | 
|  | 95 | + | 
|  | 96 | +    if (end_index < st_index) { | 
|  | 97 | +        local_size = 0; | 
|  | 98 | +    } else { | 
|  | 99 | +        local_size = ((end_index - st_index + 1)/(nprocs*blksize))*blksize; | 
|  | 100 | +        rem = (end_index - st_index + 1) % (nprocs*blksize); | 
|  | 101 | +        local_size += rem < blksize ? rem : blksize; | 
|  | 102 | +    } | 
|  | 103 | + | 
|  | 104 | +    count = local_size / blksize; | 
|  | 105 | +    rem = local_size % blksize; | 
|  | 106 | + | 
|  | 107 | +    stride = nprocs*blksize*orig_extent; | 
|  | 108 | +    if (order == MPI_ORDER_FORTRAN) { | 
|  | 109 | +        for (i=0; i<dim; i++) { | 
|  | 110 | +            stride *= gsize_array[i]; | 
|  | 111 | +        } | 
|  | 112 | +    } else { | 
|  | 113 | +        for (i=ndims-1; i>dim; i--) { | 
|  | 114 | +            stride *= gsize_array[i]; | 
|  | 115 | +        } | 
|  | 116 | +    } | 
|  | 117 | + | 
|  | 118 | +    rc = ompi_datatype_create_hvector(count, blksize, stride, type_old, type_new); | 
|  | 119 | +    if (OMPI_SUCCESS != rc) return rc; | 
|  | 120 | + | 
|  | 121 | +    if (rem) { | 
|  | 122 | +        /* if the last block is of size less than blksize, include | 
|  | 123 | +           it separately using MPI_Type_struct */ | 
|  | 124 | + | 
|  | 125 | +        types[0] = *type_new; | 
|  | 126 | +        types[1] = type_old; | 
|  | 127 | +        disps[0] = 0; | 
|  | 128 | +        disps[1] = count*stride; | 
|  | 129 | +        blklens[0] = 1; | 
|  | 130 | +        blklens[1] = rem; | 
|  | 131 | + | 
|  | 132 | +        rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp); | 
|  | 133 | +        ompi_datatype_destroy(type_new); | 
|  | 134 | +        /* even in error condition, need to destroy type_new, so check | 
|  | 135 | +           for error after destroy. */ | 
|  | 136 | +        if (OMPI_SUCCESS != rc) return rc; | 
|  | 137 | +        *type_new = type_tmp; | 
|  | 138 | +    } | 
|  | 139 | + | 
|  | 140 | +    /* need to set the UB for block-cyclic to work */ | 
|  | 141 | +    types[0] = *type_new; | 
|  | 142 | +    types[1] = MPI_UB; | 
|  | 143 | +    disps[0] = 0; | 
|  | 144 | +    disps[1] = orig_extent; | 
|  | 145 | +    if (order == MPI_ORDER_FORTRAN) { | 
|  | 146 | +        for (i=0; i<=dim; i++) { | 
|  | 147 | +            disps[1] *= gsize_array[i]; | 
|  | 148 | +        } | 
|  | 149 | +    } else { | 
|  | 150 | +        for (i=ndims-1; i>=dim; i--) { | 
|  | 151 | +            disps[1] *= gsize_array[i]; | 
|  | 152 | +        } | 
|  | 153 | +    } | 
|  | 154 | +    blklens[0] = blklens[1] = 1; | 
|  | 155 | +    rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp); | 
|  | 156 | +    ompi_datatype_destroy(type_new); | 
|  | 157 | +    /* even in error condition, need to destroy type_new, so check | 
|  | 158 | +       for error after destroy. */ | 
|  | 159 | +    if (OMPI_SUCCESS != rc) return rc; | 
|  | 160 | +    *type_new = type_tmp; | 
|  | 161 | + | 
|  | 162 | +    *st_offset = rank * blksize; | 
|  | 163 | +    /* in terms of no. of elements of type oldtype in this dimension */ | 
|  | 164 | +    if (local_size == 0) *st_offset = 0; | 
|  | 165 | + | 
|  | 166 | +    return OMPI_SUCCESS; | 
|  | 167 | +} | 
|  | 168 | + | 
|  | 169 | +int32_t ompi_datatype_create_darray(int size, | 
|  | 170 | +                                    int rank, | 
|  | 171 | +                                    int ndims, | 
|  | 172 | +                                    int const* gsize_array, | 
|  | 173 | +                                    int const* distrib_array, | 
|  | 174 | +                                    int const* darg_array, | 
|  | 175 | +                                    int const* psize_array,  | 
|  | 176 | +                                    int order, | 
|  | 177 | +                                    const ompi_datatype_t* oldtype, | 
|  | 178 | +                                    ompi_datatype_t** newtype) | 
|  | 179 | +{ | 
|  | 180 | +    ompi_datatype_t *lastType; | 
|  | 181 | +    ptrdiff_t orig_extent, *st_offsets = NULL; | 
|  | 182 | +    int i, start_loop, end_loop, step; | 
|  | 183 | +    int *coords = NULL, rc = OMPI_SUCCESS; | 
|  | 184 | + | 
|  | 185 | +    /* speedy corner case */ | 
|  | 186 | +    if (ndims < 1) { | 
|  | 187 | +        /* Don't just return MPI_DATATYPE_NULL as that can't be | 
|  | 188 | +           MPI_TYPE_FREE()ed, and that seems bad */ | 
|  | 189 | +        *newtype = ompi_datatype_create(0); | 
|  | 190 | +        ompi_datatype_add(*newtype, &ompi_mpi_datatype_null.dt, 0, 0, 0); | 
|  | 191 | +        return MPI_SUCCESS; | 
|  | 192 | +    } | 
|  | 193 | + | 
|  | 194 | +    rc = ompi_datatype_type_extent(oldtype, &orig_extent); | 
|  | 195 | +    if (MPI_SUCCESS != rc) goto cleanup; | 
|  | 196 | + | 
|  | 197 | +    /* calculate position in grid using row-major ordering */ | 
|  | 198 | +    { | 
|  | 199 | +        int tmp_rank = rank, procs = size; | 
|  | 200 | + | 
|  | 201 | +        coords = (int *) malloc(ndims * sizeof(int)); | 
|  | 202 | +        for (i = 0 ; i < ndims ; i++) { | 
|  | 203 | +            procs = procs / psize_array[i]; | 
|  | 204 | +            coords[i] = tmp_rank / procs; | 
|  | 205 | +            tmp_rank = tmp_rank % procs; | 
|  | 206 | +        } | 
|  | 207 | +    } | 
|  | 208 | + | 
|  | 209 | +    st_offsets = (ptrdiff_t *) malloc(ndims * sizeof(ptrdiff_t)); | 
|  | 210 | + | 
|  | 211 | +    /* duplicate type to here to 1) deal with constness without | 
|  | 212 | +       casting and 2) eliminate need to for conditional destroy below. | 
|  | 213 | +       Lame, yes.  But cleaner code all around. */ | 
|  | 214 | +    rc = ompi_datatype_duplicate(oldtype, &lastType); | 
|  | 215 | +    if (OMPI_SUCCESS != rc) goto cleanup; | 
|  | 216 | + | 
|  | 217 | +    /* figure out ordering issues */ | 
|  | 218 | +    if (MPI_ORDER_C == order) { | 
|  | 219 | +        start_loop = ndims - 1 ; step = -1; end_loop = -1; | 
|  | 220 | +    } else { | 
|  | 221 | +        start_loop = 0 ; step = 1; end_loop = ndims; | 
|  | 222 | +    } | 
|  | 223 | + | 
|  | 224 | +    /* Build up array */ | 
|  | 225 | +    for (i = start_loop ; i != end_loop; i += step) { | 
|  | 226 | +        int nprocs, tmp_rank; | 
|  | 227 | + | 
|  | 228 | +        switch(distrib_array[i]) { | 
|  | 229 | +        case MPI_DISTRIBUTE_BLOCK: | 
|  | 230 | +            rc = block(gsize_array, i, ndims, psize_array[i], coords[i], | 
|  | 231 | +                       darg_array[i], order, orig_extent, | 
|  | 232 | +                       lastType, newtype, st_offsets+i); | 
|  | 233 | +            break; | 
|  | 234 | +        case MPI_DISTRIBUTE_CYCLIC: | 
|  | 235 | +            rc = cyclic(gsize_array, i, ndims, psize_array[i], coords[i], | 
|  | 236 | +                        darg_array[i], order, orig_extent, | 
|  | 237 | +                        lastType, newtype, st_offsets+i); | 
|  | 238 | +            break; | 
|  | 239 | +        case MPI_DISTRIBUTE_NONE: | 
|  | 240 | +            /* treat it as a block distribution on 1 process */ | 
|  | 241 | +            if (order == MPI_ORDER_C) { | 
|  | 242 | +                nprocs = psize_array[i]; tmp_rank = coords[i]; | 
|  | 243 | +            } else { | 
|  | 244 | +                nprocs = 1; tmp_rank = 0; | 
|  | 245 | +            } | 
|  | 246 | + | 
|  | 247 | +            rc = block(gsize_array, i, ndims, nprocs, tmp_rank, | 
|  | 248 | +                       MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent, | 
|  | 249 | +                       lastType, newtype, st_offsets+i); | 
|  | 250 | +            break; | 
|  | 251 | +        default: | 
|  | 252 | +            rc = MPI_ERR_ARG; | 
|  | 253 | +        } | 
|  | 254 | +        ompi_datatype_destroy(&lastType); | 
|  | 255 | +        /* need to destroy the old type even in error condition, so | 
|  | 256 | +           don't check return code from above until after cleanup. */ | 
|  | 257 | +        if (MPI_SUCCESS != rc) goto cleanup; | 
|  | 258 | +        lastType = *newtype; | 
|  | 259 | +    } | 
|  | 260 | + | 
|  | 261 | + | 
|  | 262 | +    /* set displacement and UB correctly.  Use struct instead of | 
|  | 263 | +       resized for same reason as subarray */ | 
|  | 264 | +    { | 
|  | 265 | +        ptrdiff_t displs[3]; | 
|  | 266 | +        ompi_datatype_t *types[3]; | 
|  | 267 | +        int tmp_size, blength[3] = { 1, 1, 1}; | 
|  | 268 | + | 
|  | 269 | +        displs[1] = st_offsets[start_loop]; | 
|  | 270 | +        tmp_size = 1; | 
|  | 271 | +        for (i = start_loop + step ; i != end_loop ; i += step) { | 
|  | 272 | +            tmp_size *= gsize_array[i - step]; | 
|  | 273 | +            displs[1] += tmp_size * st_offsets[i]; | 
|  | 274 | +        } | 
|  | 275 | + | 
|  | 276 | +        displs[0] = 0; | 
|  | 277 | +        displs[1] *= orig_extent; | 
|  | 278 | +        displs[2] = orig_extent; | 
|  | 279 | +        for (i = 0 ; i < ndims ; i++) { | 
|  | 280 | +            displs[2] *= gsize_array[i]; | 
|  | 281 | +        } | 
|  | 282 | +        types[0] = MPI_LB; types[1] = lastType; types[2] = MPI_UB; | 
|  | 283 | + | 
|  | 284 | +        rc = ompi_datatype_create_struct(3, blength, displs, types, newtype); | 
|  | 285 | +        ompi_datatype_destroy(&lastType); | 
|  | 286 | +        /* need to destroy the old type even in error condition, so | 
|  | 287 | +           don't check return code from above until after cleanup. */ | 
|  | 288 | +        if (MPI_SUCCESS != rc) goto cleanup; | 
|  | 289 | +    } | 
|  | 290 | + | 
|  | 291 | + cleanup: | 
|  | 292 | +    if (NULL != st_offsets) free(st_offsets); | 
|  | 293 | +    if (NULL != coords) free(coords); | 
|  | 294 | +    return OMPI_SUCCESS; | 
|  | 295 | +} | 
0 commit comments