In [1]:
## LUMI binding generator

In [10]:
def get_ccd(x, cores_per_ccd=8, nccd=8):
    """
    Arguments:
    x             -- A cpu_bind cpu_mask as number
    cores_per_ccd -- number of cores per CCD [=8]
    nccd          -- number of CCDs [=8]

    Returns:
    n -- The number of the CCD on which mask x is located.
    """

    assert x < (0xff << (nccd-1)*cores_per_ccd)
    i = 0
    z = x
    while True:
        z >>= cores_per_ccd;
        if z == 0:
          break
        else:
          i += 1
    ccd_mask = (0xff << i*cores_per_ccd)
    assert x == (x & ccd_mask)
    return i
def split_by_ccd(x):
    """
    A string representatinv a capu_mask in binary format, ie a string consisting of [0,1]
    """
    assert len(x) % 8 == 0
    bs = 8
    return [x[i : i + bs] for i in range(0, len(x), 8)]

def translate_mask(mask, i, ncores_per_ccd=8):
    """
    Arguments:
    mask           -- a cpu mask (for a single CCD)
    i              -- move mask to i-th CCD
    ncores_per_ccd -- offset the mask targeting the n-th CCD
    """
    assert mask <= 0xFF
    val = mask << (ncores_per_ccd*i)
    return f'0x{val:x}'

def duplicate(original_list):
    return [item for item in original_list for _ in range(2)]
    
ccd_to_gcd = {0: 4, # NUMA 0 -> GPU[4,5]
              1: 5,
              2: 2, # NUMA 1 -> GPU[2,3]
              3: 3,
              4: 6, # NUMA 2 -> GPU[6,7]
              5: 7,
              6: 0, # NUMA 3 -> GPU[0,1]
              7: 1}

def print_bash_commands(data):

    data = sorted(data, key=lambda v: ccd_to_gpu[v['ccd']])

    masks = [entry['mask'] for entry in data]

    CPU_BIND='CPU_BIND=\"cpu_mask:' + ','.join(masks) + '\"'
    # array to be ndexed by $SLURM_LOCALID
    ROCR_VISIBLE_DEVICE_MAP = 'ROCR_VISIBLE_DEVICE_MAP=(' + ' '.join([str(v['gcd']) for v in data]) + ')'

    print(CPU_BIND)
    print('# index by $SLURM_LOCALID')
    print(ROCR_VISIBLE_DEVICE_MAP)




In [None]:
# 8MPI ranks, 7 OMP threads 

In [20]:
base_mask = [0xfe]
print([f'{v:x}' for v in base_mask])

# Sort masks such that
data = sorted(
    [
        {'ccd': ccd, 'mask': translate_mask(mask, ccd),
         'gcd': ccd_to_gcd[ccd]}
        for ccd in range(8)
        for mask in base_mask
    ],
    key=lambda x: ccd_to_gcd[x['ccd']],
)
print(f'num_ranks: {len(data)}')
print_bash_commands(data)


['fe']
num_ranks: 8
CPU_BIND="cpu_mask:0xfe000000000000,0xfe00000000000000,0xfe0000,0xfe000000,0xfe,0xfe00,0xfe00000000,0xfe0000000000"
# index by $SLURM_LOCALID
ROCR_VISIBLE_DEVICE_MAP=(0 1 2 3 4 5 6 7)


In [None]:
# 16 MPI ranks, 3 OMP threads

In [23]:
base_mask = [0xe, 0xe << 4]
print('mask on CCD0')
print([f'{v:0>8b}' for v in base_mask])

# Sort masks such that
data = sorted(
    [
        {'ccd': ccd, 'mask': translate_mask(mask, ccd),
         'gcd': ccd_to_gcd[ccd]}
        for ccd in range(8)
        for mask in base_mask
    ],
    key=lambda x: ccd_to_gcd[x['ccd']],
)
print(f'num_ranks: {len(data)}')
print_bash_commands(data)


mask on CCD0
['00001110', '11100000']
num_ranks: 16
CPU_BIND="cpu_mask:0xe000000000000,0xe0000000000000,0xe00000000000000,0xe000000000000000,0xe0000,0xe00000,0xe000000,0xe0000000,0xe,0xe0,0xe00,0xe000,0xe00000000,0xe000000000,0xe0000000000,0xe00000000000"
# index by $SLURM_LOCALID
ROCR_VISIBLE_DEVICE_MAP=(0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7)


In [25]:
# 24 MPI ranks x 2 OMP threads

In [26]:
base_mask = [0x3, 0x3 << 2, 0x3 << 4]
print('mask on CCD0')
print([f'{v:0>8b}' for v in base_mask])
# Sort masks such that
data = sorted(
    [
        {'ccd': ccd, 'mask': translate_mask(mask, ccd),
         'gcd': ccd_to_gcd[ccd]}
        for ccd in range(8)
        for mask in base_mask
    ],
    key=lambda x: ccd_to_gcd[x['ccd']],
)
print_bash_commands(data)


mask on CCD0
['00000011', '00001100', '00110000']
CPU_BIND="cpu_mask:0x3000000000000,0xc000000000000,0x30000000000000,0x300000000000000,0xc00000000000000,0x3000000000000000,0x30000,0xc0000,0x300000,0x3000000,0xc000000,0x30000000,0x3,0xc,0x30,0x300,0xc00,0x3000,0x300000000,0xc00000000,0x3000000000,0x30000000000,0xc0000000000,0x300000000000"
# index by $SLURM_LOCALID
ROCR_VISIBLE_DEVICE_MAP=(0 0 0 1 1 1 2 2 2 3 3 3 4 4 4 5 5 5 6 6 6 7 7 7)


In [27]:
# 32 MPI ranks x 1 OMP thread

In [29]:
# 4 ranks per ccd
base_mask = [0x1 << i for i in range(4)]
print('mask on CCD0')
print([f'{v:0>8b}' for v in base_mask])
# Sort masks such that
data = sorted(
    [
        {'ccd': ccd, 'mask': translate_mask(mask, ccd),
         'gcd': ccd_to_gcd[ccd]}
        for ccd in range(8)
        for mask in base_mask
    ],
    key=lambda x: ccd_to_gcd[x['ccd']],
)
print_bash_commands(data)


mask on CCD0
['00000001', '00000010', '00000100', '00001000']
CPU_BIND="cpu_mask:0x1000000000000,0x2000000000000,0x4000000000000,0x8000000000000,0x100000000000000,0x200000000000000,0x400000000000000,0x800000000000000,0x10000,0x20000,0x40000,0x80000,0x1000000,0x2000000,0x4000000,0x8000000,0x1,0x2,0x4,0x8,0x100,0x200,0x400,0x800,0x100000000,0x200000000,0x400000000,0x800000000,0x10000000000,0x20000000000,0x40000000000,0x80000000000"
# index by $SLURM_LOCALID
ROCR_VISIBLE_DEVICE_MAP=(0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5 6 6 6 6 7 7 7 7)
