In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import ipyparallel as ipp
def mpi_example():
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    return f"Hello World from rank {comm.Get_rank()}. total ranks={comm.Get_size()}. host={MPI.Get_processor_name()}"

# request an MPI cluster with 24 engines
with ipp.Cluster(controller_ip="*", engines="mpi", n=4) as rc:
    # get a broadcast_view on the cluster which is best
    # suited for MPI style computation
    view = rc.broadcast_view()
    # run the mpi_example function on all engines in parallel
    r = view.apply_sync(mpi_example)
    # Retrieve and print the result from the engines
    print('hello')
    print("\n".join(r))
# at this point, the cluster processes have been shutdown

Starting 4 engines with <class 'ipyparallel.cluster.launcher.MPIEngineSetLauncher'>
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.41s/engine]
hello
Hello World from rank 0. total ranks=4. host=pop-os
Hello World from rank 1. total ranks=4. host=pop-os
Hello World from rank 2. total ranks=4. host=pop-os
Hello World from rank 3. total ranks=4. host=pop-os
Stopping engine(s): 1686989258
engine set stopped 1686989258: {'exit_code': 1, 'pid': 8754, 'identifier': 'ipengine-1686989257-syq9-1686989258-8710'}
Stopping controller
Controller stopped: {'exit_code': 0, 'pid': 8725, 'identifier': 'ipcontroller-1686989257-syq9-8710'}


In [3]:
import numpy as np

alat = 5.107
ecutwfc = 20
ecutrho = 4 * ecutwfc

latvec_alat = 0.5 * np.array([
    [ 1,  1,  1],
    [-1,  1,  1], 
    [-1, -1,  1]
]).T

In [4]:
from qtmpy.lattice import RealLattice

reallat = RealLattice.from_alat(alat, *latvec_alat)
for i, ax in enumerate(reallat.axes_cart):
    print(f"a{i+1} : {ax}")

l_vec_cryst = np.eye(3)
l_vec_cart = reallat.cryst2cart(l_vec_cryst, axis=1)

print(l_vec_cart)

l_vec_cart = reallat.latvec
l_vec_cryst = reallat.cart2cryst(l_vec_cart, axis=0)
print(l_vec_cryst)

l_vec_cryst = reallat.alat2cryst(l_vec_cart / reallat.alat, axis=0)
print(l_vec_cryst)

a1 : [2.5535, -2.5535, -2.5535]
a2 : [2.5535, 2.5535, -2.5535]
a3 : [2.5535, 2.5535, 2.5535]
[[ 2.5535 -2.5535 -2.5535]
 [ 2.5535  2.5535 -2.5535]
 [ 2.5535  2.5535  2.5535]]
[[ 1.00000000e+00 -5.55111512e-17 -5.55111512e-17]
 [ 0.00000000e+00  1.00000000e+00  1.66533454e-16]
 [-5.55111512e-17  5.55111512e-17  1.00000000e+00]]
[[ 1.00000000e+00 -7.08738623e-17 -7.08738623e-17]
 [ 0.00000000e+00  1.00000000e+00  1.41747725e-16]
 [-7.08738623e-17  7.08738623e-17  1.00000000e+00]]


In [5]:
from qtmpy.config import qtmconfig
from qtmpy.lattice import ReciLattice
from qtmpy.gspace.gspc import GSpace

qtmconfig.fft_backend = 'numpy'

recilat = ReciLattice.from_reallat(reallat)
gspc = GSpace(recilat, ecutrho)

In [6]:
print(gspc.grid_shape)
print(gspc.g_cryst)

(18, 18, 18)
[[ 0  0  0 ... -1 -1 -1]
 [ 0  0  0 ... -1 -1 -1]
 [ 0  1  2 ... -3 -2 -1]]


In [7]:
# Choose the axis/alignmennt of sticks
axis = 2

grid_shape = gspc.grid_shape
g_cryst = gspc.g_cryst
numg = gspc.size

# Each G vector will be part of a stick that is labelled
# by its coordinates in which the one corresponding to 
# 'axis' is neglected
isticks = np.delete(g_cryst, axis, 0)
ni = np.delete(grid_shape, axis, 0)

# The 2d label of sticks are mapped to 1d numbers here
isticks += (isticks < 0) * ni.reshape((2, 1))
isticks = ni[1] * isticks[0] + isticks[1]

# G-vectors are now sorted so that those lying along the same
# stick is grouped together.
idxsort = np.argsort(isticks)
g_cryst = g_cryst[(slice(None), idxsort)]

# The number of sticks are given by the unique label entries
isticks, lensticks = np.unique(isticks, return_counts=True)
# Note that np.unique gives unique and sorted list of isticks
numsticks = len(isticks)

In [8]:
print(len(isticks))
#print(np.column_stack((isticks, lensticks)))

229


In [18]:
x = np.array([0, 1, 5, 10, 11, 3, 10001])
bins = np.array([1, 10, 100, 1000])
inds = np.digitize(x, bins, right=False)
inds

array([0, 1, 1, 2, 2, 1, 4])

In [19]:
bins = np.array([1, 10, 100, 1000])
y = np.array([100, 100, 100, 1, 1, 1000, 10])
print(np.searchsorted(bins, y, side='left'))

[2 2 2 0 0 3 1]


In [23]:
print(np.repeat([10, 100, 1000], [5, 1, 2]))


[  10   10   10   10   10  100 1000 1000]


In [None]:
x = np.array([0, 0, 0, 1, 1, 2])
bins = np.arange(4 + 1, dtype='i8')
print(np.histogram(x, bins))

(array([2, 1, 0]), array([1, 2, 3, 4]))


In [15]:
print(lensticks[:10])
np.cumsum(lensticks)[:10]

[15 14 15 14 13 12 11  8  5  5]


array([ 15,  29,  44,  58,  71,  83,  94, 102, 107, 112])

In [14]:
# Now distrbuting the g vectors
# They are distributed based on the sorted order, not the original order
# This allows us to transfer them as vectors using MPI_Alltoallv
numg_global = numg
g_cryst_global = np.copy(g_cryst)
isticks_global = np.copy(isticks)
lensticks_global = np.copy(lensticks)
numsticks_global = numsticks

nproc = 4
for iproc in range(nproc):
    # First construct the local slice of G-vectors
    numg_proc = (numg_global // nproc) + (numg_global % nproc > iproc)
    ig_start = iproc * (numg_global // nproc) + min(numg_global % nproc, iproc)
    ig_stop = ig_start + numg_proc
    ig_local = slice(ig_start, ig_stop)
    
    # Find the list of local sticks
    numsticks_proc = (numsticks_global // nproc) + (numsticks_global % nproc > iproc)
    istick_start = iproc * (numsticks_global // nproc) + min(numsticks_global % nproc, iproc)
    istick_stop = istick_start + numsticks_proc
    isticks_local = isticks_global[istick_start:istick_stop]
    #print(isticks_local)
    
    # Find the corresponding istick coordinate of the local G-vectors
    g_isticks_global = np.delete(g_cryst_global, axis, 0)
    ni = np.delete(grid_shape, axis, 0)
    g_isticks_global += (g_isticks_global < 0) * ni.reshape((2, 1))
    g_isticks_global = ni[1] * g_isticks_global[0] + g_isticks_global[1]
    
    # Find out which istick goes to which proc
    l_numsticks_world = (numsticks_global // nproc) \
        + (numsticks_global % nproc > np.arange(nproc, dtype='i8'))
    bin_edges = isticks_global[np.cumsum(l_numsticks_world) - 1]
    #print(bin_edges)
    g_global_iproc = np.digitize(g_isticks_global, bin_edges, right=True)
    g_local_iproc = g_global_iproc[ig_local]
    print(g_local_iproc)
    sendbufspec, _ = np.histogram(g_local_iproc, np.arange(nproc + 1, dtype='i8'))
    recvbufspec, _ = np.histogram(g_global_iproc, np.arange(nproc + 1, dtype='i8'))
    print(sendbufspec, recvbufspec)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 

In [35]:
nproc = 4
bin_edges = []
for iproc in range(nproc):
    numsticks_proc = (numsticks // nproc) + (numsticks % nproc > iproc)
    istick_start = iproc * (numsticks // nproc) + min(numsticks % nproc, iproc)
    istick_stop = istick_start + numsticks_proc
    bin_edges.append(istick_stop)
    print(numsticks, numsticks_proc, istick_start, istick_stop)
    print(isticks[istick_start: istick_stop])

229 58 0 58
[ 0  1  2  3  4  5  6  7  8 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 47 48 49 50 51 52
 53 54 55 56 57 58 59 60 61 62]
229 57 58 115
[ 66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  84  85  86
  87  88  89  90  91  92  93  94  95  96  97  98 103 104 105 106 107 108
 109 110 111 112 113 114 115 116 122 123 124 125 126 127 128 129 130 131
 132 133 142]
229 57 115 172
[143 144 145 146 147 148 149 150 180 192 193 194 195 196 197 198 199 200
 209 210 211 212 213 214 215 216 217 218 219 220 226 227 228 229 230 231
 232 233 234 235 236 237 238 239 244 245 246 247 248 249 250 251 252 253
 254 255 256]
229 57 172 229
[257 258 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 280
 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 298 299 300
 301 302 303 304 305 306 307 308 309 310 311 312 313 316 317 318 319 320
 321 322 323]


In [37]:
print(bin_edges)
ax_dist = (2, 2)

[58, 115, 172, 229]


In [1]:
import ipyparallel as ipp

nproc = 4
shape = (10, 12, 15)
distrib = [2, 2, 1]
    
def test_pfft(shape, distrib):
    from mpi4py import MPI
    from mpi4py_fft import DistArray
    comm = MPI.COMM_WORLD
    comm_size, comm_rank = comm.Get_size(), comm.Get_rank()

    arr = DistArray(shape, distrib, dtype='c16')
    return f"{comm_rank}/{comm_size}: {shape}->{arr.shape}, {arr.local_slice()}"
    
print(test_pfft(shape, [0, 0, 1]))

# request an MPI cluster with 24 engines
with ipp.Cluster(controller_ip="*", engines="mpi", n=nproc) as rc:
    view = rc.broadcast_view()
    r = view.apply_sync(test_pfft, shape, distrib)
    print("\n".join(r))

0/1: (10, 12, 15)->(10, 12, 15), (slice(0, 10, None), slice(0, 12, None), slice(0, 15, None))
Starting 4 engines with <class 'ipyparallel.cluster.launcher.MPIEngineSetLauncher'>
100%|██████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.45s/engine]
0/4: (10, 12, 15)->(5, 6, 15), (slice(0, 5, None), slice(0, 6, None), slice(0, 15, None))
1/4: (10, 12, 15)->(5, 6, 15), (slice(0, 5, None), slice(6, 12, None), slice(0, 15, None))
2/4: (10, 12, 15)->(5, 6, 15), (slice(5, 10, None), slice(0, 6, None), slice(0, 15, None))
3/4: (10, 12, 15)->(5, 6, 15), (slice(5, 10, None), slice(6, 12, None), slice(0, 15, None))
Stopping engine(s): 1686721140
engine set stopped 1686721140: {'exit_code': 1, 'pid': 8254, 'identifier': 'ipengine-1686721139-b27l-1686721140-7979'}
Stopping controller
Controller stopped: {'exit_code': 0, 'pid': 8225, 'identifier': 'ipcontroller-1686721139-b27l-7979'}


In [21]:
import numpy as np
import ipyparallel as ipp

shape = (10, 10)
nproc = 4


class Test:
    
    def __init__(self, val):
        self.buffer = np.empty(shape, dtype='i8')
        self.buffer[:] = val
        
    def __array__(self):
        return self.buffer
    

def test_array():
    from mpi4py import MPI
    from mpi4py.MPI import IN_PLACE, SUM
    from mpi4py_fft import DistArray
    comm = MPI.COMM_WORLD
    comm_size, comm_rank = comm.Get_size(), comm.Get_rank()

    arr = Test(comm_rank)
    val_final = (comm_size - 1) * comm_size // 2
    comm.Allreduce(IN_PLACE, arr.buffer, op=SUM)
    check = np.all(arr.buffer == val_final)
    return f"{comm_rank}/{comm_size}: {check}"
    
print(test_array())

# request an MPI cluster with 24 engines
with ipp.Cluster(controller_ip="*", engines="mpi", n=nproc) as rc:
    view = rc.broadcast_view()
    r = view.apply_sync(test_array)
    print("\n".join(r))

AttributeError: 'numpy.ndarray' object has no attribute '__array_namespace__'