In [1]:
import numpy as np
import random

### Initialization

In [2]:
BLOCK_X = 16
BLOCK_Y = 16

In [3]:
BMULT_X = 2
BMULT_Y = 1

In [4]:
ndev = 1

In [5]:
X = 4096
Y = 4096
slY = 2048
slX = 2048

In [6]:
SPIN_X_WORD = 16

In [7]:
# Size of word lattice for b/w 
lld = (X/2)/SPIN_X_WORD

# length of single color section
llenLoc = Y*lld

# length of whole word lattice length
llen = 2*ndev*llenLoc

# X dimension of word lattice with 2 unsigned long long
dimX = int(lld/2)

In [8]:
def div_up(a,b):
    res = (a+b-1)/b
    return int(res)

# Block grid
grid = [div_up(lld/2,BLOCK_X*BMULT_X),div_up(Y,BLOCK_Y*BMULT_Y)]

# Thread grid
block = [BLOCK_X,BLOCK_Y]

### Lattice init

In [9]:
v_d = np.zeros(int(llen))

In [10]:
for by in range(grid[1]):
    for bx in range(grid[0]):
        for tx in range(block[0]):
            for ty in range(block[1]):
                __i = by*BLOCK_Y*BMULT_Y + ty
                __j = bx*BLOCK_X*BMULT_X + tx
                
                for i in range(BMULT_Y):
                    for j in range(BMULT_X):
                        v_d[int((__i+i*BLOCK_Y)*lld/2 + __j +j*BLOCK_X)] = (__i+i*BLOCK_Y)*lld/2 + __j +j*BLOCK_X

### Load tile

In [11]:
v_src = np.arange(int(llenLoc/2))

tile = np.zeros((BLOCK_Y*BMULT_Y+2,BLOCK_X*BMULT_X+2))

In [19]:
tile[17,:]

array([   0, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033,
       1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044,
       1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
          0])

In [18]:
1+BLOCK_Y*BMULT_Y

17

In [12]:
def load_tile(blkx, blky, TILE_X, TILE_Y, BLOCK_Y, BLOCK_X, slX, slY, dimX, v_src, tile):
    # Loop over threads in given block
    for tidy in range(BLOCK_Y):
        for tidx in range(BLOCK_X): 
            
            # Get starting positions
            startX = blkx*TILE_X
            startY = blky*TILE_Y

            for j in range(0, TILE_Y, BLOCK_Y):
                yoff = startY + j + tidy

                for i in range(0, TILE_X, BLOCK_X):
                    xoff = startX + i + tidx
                    tile[1 + j + tidy][1 + i + tidx] = v_src[yoff*dimX+xoff]
            
            if tidy == 0:
            
                yoff = startY + slY - 1 if (startY%slY==0) else startY - 1

                for i in range(0, TILE_X, BLOCK_X):
                    xoff = startX + i + tidx
                    tile[0][1 + i + tidx] = v_src[yoff*dimX+xoff]

                yoff = startY + TILE_Y - slY if (((startY+TILE_Y) % slY) == 0) else startY + TILE_Y

                for i in range(0, TILE_X, BLOCK_X):
                    xoff = startX + i + tidx
                    tile[1+TILE_Y][1 + i + tidx] = v_src[yoff*dimX+xoff]
                
                if BLOCK_X <= BMULT_Y*BLOCK_Y:
                    xoff = startX + slX - 1 if ((startX % slX) == 0) else startX-1

                    for j in range(0,TILE_Y, BLOCK_X):
                        yoff = startY + j + tidx
                        tile[1+j+tidx][0] = v_src[yoff*dimX+xoff]

                    xoff = startX + TILE_X - slX if ((startX+TILE_X) % slX == 0) else startX + TILE_X

                    for j in range(0, TILE_Y, BLOCK_X):
                        yoff = startY + j + tidx
                        tile[1+j+tidx][1+TILE_X] = v_src[yoff*dimX+xoff]
    
    return tile.astype(int)

In [13]:
grid

[2, 256]

In [15]:
blkx = 0
blky = 0

In [16]:
tile = load_tile(blkx, blky, BMULT_X*BLOCK_X, BMULT_Y*BLOCK_Y, BLOCK_Y, BLOCK_X, int((slX/2)/SPIN_X_WORD/2), slY, dimX, v_src, tile)

In [17]:
tile

array([[     0, 131008, 131009, 131010, 131011, 131012, 131013, 131014,
        131015, 131016, 131017, 131018, 131019, 131020, 131021, 131022,
        131023, 131024, 131025, 131026, 131027, 131028, 131029, 131030,
        131031, 131032, 131033, 131034, 131035, 131036, 131037, 131038,
        131039,      0],
       [    31,      0,      1,      2,      3,      4,      5,      6,
             7,      8,      9,     10,     11,     12,     13,     14,
            15,     16,     17,     18,     19,     20,     21,     22,
            23,     24,     25,     26,     27,     28,     29,     30,
            31,      0],
       [    95,     64,     65,     66,     67,     68,     69,     70,
            71,     72,     73,     74,     75,     76,     77,     78,
            79,     80,     81,     82,     83,     84,     85,     86,
            87,     88,     89,     90,     91,     92,     93,     94,
            95,     64],
       [   159,    128,    129,    130,    131,    132,    13

In [104]:
128/16/2

4.0

### Spin Update

In [16]:
v_dst = np.arange(int(llenLoc/2))
v_dst_rs = v_dst.reshape((Y, int(lld/2)))

In [17]:
tidy = 0
tidx = 0

In [18]:
COLOR = "black"

In [19]:
__i = blky*BMULT_Y*BLOCK_Y + tidy
__j = blkx*BMULT_X*BLOCK_X + tidx

In [20]:
__me = np.zeros([1,2])

In [21]:
for i in range(BMULT_Y):
    for j in range(BMULT_X):
        __me[i,j] = v_dst[(__i+i*BLOCK_Y)*dimX + __j + j*BLOCK_X]

In [22]:
__up = np.zeros([1,2])
__ct = np.zeros([1,2])
__dw = np.zeros([1,2])

In [23]:
for i in range(BMULT_Y):
    for j in range(BMULT_X):
        __up[i][j] = tile[i*BLOCK_Y + tidy][j*BLOCK_X+1+tidx]
        __ct[i][j] = tile[i*BLOCK_Y + 1 + tidy][j*BLOCK_X+1+tidx]
        __dw[i][j] = tile[i*BLOCK_Y + 2 + tidy][j*BLOCK_X+1+tidx]

In [24]:
readBack = not (__i%2) if (COLOR == "black") else (__i%2)

In [25]:
__sd = np.zeros((1,2))

In [26]:
for i in range(BMULT_Y):
    for j in range(BMULT_X):
        __sd[i][j] = tile[i*BLOCK_Y + 1+tidy][j*BLOCK_Y +   tidx] if readBack else tile[i*BLOCK_Y + 1+tidy][j*BLOCK_Y + 2+tidx]
        
__up = __up.astype(int)
__ct = __ct.astype(int)
__dw = __dw.astype(int)
__me = __me.astype(int)

In [27]:
print("me", __me)
print("up", __up)
print("side", __sd)
print("center", __ct)
print("down", __dw)

me [[ 0 16]]
up [[65504 65520]]
side [[31. 15.]]
center [[ 0 16]]
down [[32 48]]


In [28]:
row = np.where(v_dst_rs==__me[0][0])[0][0]
column = np.where(v_dst_rs==__me[0][0])[1][0]

In [33]:
me_x0 = [i for i in range(tuple_bw_lattice[row,column],tuple_bw_lattice[row,column]+32,2)]
me_y0 = [i for i in range(tuple_bw_lattice[row,column]+32,tuple_bw_lattice[row,column]+64,2)]

In [38]:
me_x0

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]

In [153]:
lattice

array([[      0,       1,       2, ...,    2045,    2046,    2047],
       [   2048,    2049,    2050, ...,    4093,    4094,    4095],
       [   4096,    4097,    4098, ...,    6141,    6142,    6143],
       ...,
       [4188160, 4188161, 4188162, ..., 4190205, 4190206, 4190207],
       [4190208, 4190209, 4190210, ..., 4192253, 4192254, 4192255],
       [4192256, 4192257, 4192258, ..., 4194301, 4194302, 4194303]])

In [116]:
ct_x0 = [i for i in range(tuple_ww_lattice[17,0],tuple_ww_lattice[17,0]+32,2)]
ct_y0 = [i for i in range(tuple_ww_lattice[17,0]+32,tuple_ww_lattice[17,0]+64,2)]

In [117]:
ct_y0

[34848,
 34850,
 34852,
 34854,
 34856,
 34858,
 34860,
 34862,
 34864,
 34866,
 34868,
 34870,
 34872,
 34874,
 34876,
 34878]

In [111]:
sd_x0 = [i for i in range(tuple_ww_lattice[17,1],tuple_ww_lattice[17,1]+32,2)]
sd_y0 = [i for i in range(tuple_ww_lattice[17,1]+32,tuple_ww_lattice[17,1]+64,2)]

In [118]:
sd_x0

[34880,
 34882,
 34884,
 34886,
 34888,
 34890,
 34892,
 34894,
 34896,
 34898,
 34900,
 34902,
 34904,
 34906,
 34908,
 34910]

### Binary bit operations

In [166]:
lattice = np.arange(X*Y).reshape((Y,X))

In [167]:
black_lattice = np.zeros((Y,int(X/2)))
white_lattice = np.zeros((Y,int(X/2)))

for i in range(Y):
    if i % 2 == 0:
        black_lattice[i,:] = lattice[i,0::2]
        white_lattice[i,:] = lattice[i,1::2]
    else:
        black_lattice[i,:] = lattice[i,1::2]
        white_lattice[i,:] = lattice[i,0::2]
        
black_lattice = black_lattice.astype(int)
white_lattice = white_lattice.astype(int)

In [168]:
black_word_lattice = black_lattice[:,::16].astype(int)
white_word_lattice = white_lattice[:,::16].astype(int)

tuple_bw_lattice = black_word_lattice[:,::2].astype(int)
tuple_ww_lattice = white_word_lattice[:,::2].astype(int)

In [169]:
me_x0 = [i for i in range(0,32,2)]
me_y0 = [i for i in range(32,64,2)]

In [170]:
me_x0

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]

In [171]:
ct_x0 = [i for i in range(1,32,2)]
ct_y0 = [i for i in range(33,64,2)]

In [172]:
sd_x0 = [i for i in range(tuple_ww_lattice[0, 31], tuple_ww_lattice[0, 31] + 32,2)]
sd_y0 = [i for i in range(tuple_ww_lattice[0, 31] + 32, tuple_ww_lattice[0, 31] + 64,2)]

In [173]:
dw_x0 = [i for i in range(2048, 2048+32,2)]
dw_y0 = [i for i in range(2048 + 32, 2048 + 2*32, 2)]

In [174]:
up_x0 = [lattice[-1,i] for i in range(0,32,2)]
up_y0 = [lattice[-1,i] for i in range(32,64,2)]

In [176]:
lattice

array([[      0,       1,       2, ...,    2045,    2046,    2047],
       [   2048,    2049,    2050, ...,    4093,    4094,    4095],
       [   4096,    4097,    4098, ...,    6141,    6142,    6143],
       ...,
       [4188160, 4188161, 4188162, ..., 4190205, 4190206, 4190207],
       [4190208, 4190209, 4190210, ..., 4192253, 4192254, 4192255],
       [4192256, 4192257, 4192258, ..., 4194301, 4194302, 4194303]])

In [175]:
up_x0

[4192256,
 4192258,
 4192260,
 4192262,
 4192264,
 4192266,
 4192268,
 4192270,
 4192272,
 4192274,
 4192276,
 4192278,
 4192280,
 4192282,
 4192284,
 4192286]

In [62]:
ct_x0

[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]

In [63]:
ct_y0

[33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63]

In [42]:
me_x0

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]

In [None]:
[2047, 1, 3,...,29]

In [43]:
me_y0

[32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62]

### Interactions

In [101]:
__i = blky*BMULT_Y*BLOCK_Y + tidy

In [102]:
readBack = not(__i%2)

In [103]:
me_x0

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]

In [104]:
me_y0

[32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62]

In [136]:
neighbors_x0 = []

for s in me_x0:
    i = np.where(lattice == s)[0][0]
    j = np.where(lattice == s)[1][0]
    
    # down neighbor
    if (i+1)<lattice.shape[0]:
        down = lattice[i+1,j]
    else:
        down = lattice[0,j]
        
    # up neighbor
    if (i-1) < 0:
        up = lattice[-1,j]
    else:
        up = lattice[i-1,j]
    
    # left neighbor
    if (j-1)<0:
        left = lattice[i, -1]
    else:
        left = lattice[i, j-1]
    
    # right neighbor
    if (j+1) < lattice.shape[1]:
        right = lattice[i,j+1]
    else:
        right = lattice[i,0]
    
    neighbors_x0 += [up, down, left, right]

neighbors_y0 = []

for s in me_y0:
    i = np.where(lattice == s)[0][0]
    j = np.where(lattice == s)[1][0]
    
    # down neighbor
    if (i+1)<lattice.shape[0]:
        down = lattice[i+1,j]
    else:
        down = lattice[0,j]
        
    # up neighbor
    if (i-1) < 0:
        up = lattice[-1,j]
    else:
        up = lattice[i-1,j]
    
    # left neighbor
    if (j-1)<0:
        left = lattice[i, -1]
    else:
        left = lattice[i, j-1]
    
    # right neighbor
    if (j+1) < lattice.shape[1]:
        right = lattice[i,j+1]
    else:
        right = lattice[i,0]
    
    neighbors_y0 += [up, down, left, right]

In [137]:
BITXSP = 4
BITXWORD = 64

In [138]:
up_x0 = np.zeros(64)
up_y0 = np.zeros(64)
down_x0 = np.zeros(64)
down_y0 = np.zeros(64)
ct_x0 = np.zeros(64)
ct_y0 = np.zeros(64)
sd_x0 = np.zeros(64)

for i in range(64):
    if i%4 == 0:
        up_x0[i+1] = neighbors_x0[i]
        up_y0[i+1] = neighbors_y0[i]
        
    if i%4 == 1:
        down_x0[i-1] = neighbors_x0[i]
        down_y0[i-1] = neighbors_y0[i]
    
    if i%4 == 2:
        ct_x0[i+1] = neighbors_x0[i]
        ct_y0[i+1] = neighbors_y0[i]

       
        
up_x0 = up_x0.astype(int)
down_x0 = down_x0.astype(int)
up_y0 = up_y0.astype(int)
down_y0 = down_y0.astype(int)
ct_x0 = ct_x0.astype(int)
ct_y0 = ct_y0.astype(int)

In [139]:
logical_or_x0 = np.zeros(64)
logical_or_y0 = np.zeros(64)

for i in range(64):
    if i%4 == 3:
        logical_or_x0[i-BITXSP -1] = neighbors_x0[i]
        
        if i+BITXWORD - BITXSP - 1 < 64:
            logical_or_y0[i + BITXWORD - BITXSP - 1] = neighbors_x0[i]
            sd_x0[i+BITXWORD-BITXSP-1] = neighbors_y0[i]
        
        logical_or_y0[i-BITXSP -1] = neighbors_y0[i]
        
        
logical_or_x0 = logical_or_x0.astype(int)
logical_or_y0 = logical_or_y0.astype(int)

In [140]:
ct_y0 += logical_or_y0

In [141]:
ct_x0 += logical_or_x0

In [151]:
ct_x0

array([   0,    0,    3, 2047,    0,    0,    5,    1,    0,    0,    7,
          3,    0,    0,    9,    5,    0,    0,   11,    7,    0,    0,
         13,    9,    0,    0,   15,   11,    0,    0,   17,   13,    0,
          0,   19,   15,    0,    0,   21,   17,    0,    0,   23,   19,
          0,    0,   25,   21,    0,    0,   27,   23,    0,    0,   29,
         25,    0,    0,   31,   27,    0,    0,    1,   29])

In [152]:
ct_y0

array([ 0,  0, 35, 31,  0,  0, 37, 33,  0,  0, 39, 35,  0,  0, 41, 37,  0,
        0, 43, 39,  0,  0, 45, 41,  0,  0, 47, 43,  0,  0, 49, 45,  0,  0,
       51, 47,  0,  0, 53, 49,  0,  0, 55, 51,  0,  0, 57, 53,  0,  0, 59,
       55,  0,  0, 61, 57,  0,  0, 63, 59,  0,  0, 33, 61])

In [144]:
sd_x0

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., 33.,  0.])

In [164]:
for i in range(BMULT_Y):
    yoff = __i + i*BLOCK_Y
    
    upOff = yoff + slY - 1 if ((yoff%slY) == 0) else yoff - 1
    dwOff = yoff - ysl + 1 if ((yoff+1)%slY == 0) else yoff + 1
    
    for j in range(BMULT_X):
        
        xoff = __j + j*BLOCK_X
        
        if readBack:
            sideoff = xoff+ slX - 1 if (xoff%slX) == 0 else xoff -1
        else:
            sideoff = xoff- slX + 1 if ((xoff+1)%slX) == 0 else xoff + 1
        print(yoff*dimX + xoff)
        print(upOff*dimX + xoff)
        print(dwOff*dimX + xoff)
        print(yoff*dimX + sideoff)
        print('------------------')
        

0
65504
32
31
------------------
16
65520
48
15
------------------


In [161]:
v_dst_rs

array([[    0,     1,     2, ...,    29,    30,    31],
       [   32,    33,    34, ...,    61,    62,    63],
       [   64,    65,    66, ...,    93,    94,    95],
       ...,
       [65440, 65441, 65442, ..., 65469, 65470, 65471],
       [65472, 65473, 65474, ..., 65501, 65502, 65503],
       [65504, 65505, 65506, ..., 65533, 65534, 65535]])

### Exp

In [54]:
exp = np.zeros((2,5))
temp = 2

In [59]:
for i in range(2):
    for j in range(5):
        if temp > 0:
            if i == 0:
                exp[i,j] = np.exp(2*(j*2-4)*1/temp)
            else:
                exp[i,j] = np.exp(-2*(j*2-4)*1/temp)
        else:
            if j==2:
                exp[i,j] = 0.5
            
            else:
                if i == 0:
                    exp[i,j] = np.exp(2*(j*2-4))
                else:
                    exp[i,j] = np.exp(-2*(j*2-4))
                
        

In [60]:
exp

array([[1.83156389e-02, 1.35335283e-01, 1.00000000e+00, 7.38905610e+00,
        5.45981500e+01],
       [5.45981500e+01, 7.38905610e+00, 1.00000000e+00, 1.35335283e-01,
        1.83156389e-02]])