In [28]:
from scipy.optimize import linear_sum_assignment
import numpy as np

In [29]:
cost = np.array([[4, 1, 3], [2, 0, 5], [3, 2, 2]])
row_ind, col_ind = linear_sum_assignment(cost)
cost[row_ind, col_ind].sum()

5

In [30]:
row_ind, col_ind

(array([0, 1, 2]), array([1, 0, 2]))

In [31]:
cost1 = np.array([[4, 0, 0], [2, 0, 0], [1, 0, 0]])
row_ind, col_ind = linear_sum_assignment(cost1)
cost[row_ind, col_ind].sum()

9

# Show GPU utilization

In [None]:
from dataclasses import dataclass
import subprocess
import csv

@dataclass(frozen=True)
class Gpu:
    uuid: str
    name: str
    utilization: float

def fetch():
    servers = ['gpu3', 'gpu4', 'gpu5']
    gpus = []

    for server in servers:
        result = subprocess.run(
            f"ssh {server} nvidia-smi --query-gpu=uuid,gpu_name,utilization.gpu --format=csv,noheader".split(' '), 
            stdout = subprocess.PIPE
        ).stdout.decode('utf-8').splitlines()
        
        for stat in csv.reader(result, delimiter=','):
            gpus.append(Gpu(uuid=stat[0].strip('GPU-'), name=stat[1], utilization=float(stat[2].strip('%'))))

    return gpus

for gpu in fetch():
    print(gpu)

# Stream Docker Output

In [1]:
import docker
client = docker.from_env()

container = client.containers.run(
    name = "tensorflow-mnist-105",
    image="horovod/horovod:latest", 
    command="horovodrun -np 2 -H localhost:2 python ./tensorflow2/tensorflow2_keras_mnist.py",
    shm_size="1G",
    detach=True,
    environment={
        "NVIDIA_VISIBLE_DEVICES": "0,1",
    }
)
status = container.wait()
print(status)

{'Error': None, 'StatusCode': 0}


In [2]:
container = client.containers.run(
    name = "tensorflow-mnist-106",
    image="horovod/horovod:latest", 
    command="horovodrun -np 2 -H localhost:2 python ./tensorflow2/tensorflow2_keras.py",
    shm_size="1G",
    detach=True,
    environment={
        "NVIDIA_VISIBLE_DEVICES": "0,1",
    }
)

status = container.wait()
print(status)

{'Error': None, 'StatusCode': 2}


In [1]:
gpus = {"server": "gpu3", "gpus": [1,2,3]}

",".join([str(gpu) for gpu in gpus['gpus']])

'1,2,3'