# Infrastructure Setup

In [None]:
import os
from chi import context

context.use_site("CHI@TACC")
context.choose_project()
username = os.environ.get('USER').replace("_", "-")

## Creating a 1-day lease

In [None]:
from chi import lease
from datetime import timedelta

In [None]:
node_type="gpu_mi100"
instance_count = 2
network_name="sharednet1"

In [None]:
mpi_lease = lease.Lease(f"{username}-mpi-lease-rocm", duration=timedelta(days=4))

mpi_lease.add_node_reservation(amount=instance_count, node_type=node_type)
mpi_lease.add_fip_reservation(1)
mpi_lease.submit(idempotent=True)

## Creating server instances

In [None]:
from chi import server

In [None]:
mpi_master=server.Server(
        f"{username}-mpi-master-rocm",
        reservation_id=mpi_lease.node_reservations[0]["id"],
        image_name="Ubuntu22.04-HPC-MPI-Spack-ROCm",
        network_name=network_name
)
mpi_master_hostname = mpi_master.name
mpi_master.submit(idempotent=True)

mpi_workers = []
mpi_worker_hostnames = []
for i in range(instance_count - 1):
    mpi_worker=server.Server(
            f"{username}-mpi-worker-{i+1}-rocm",
            reservation_id=mpi_lease.node_reservations[0]["id"],
            image_name="Ubuntu22.04-HPC-MPI-Spack-ROCm",
            network_name=network_name
    )
    mpi_worker.submit(idempotent=True)
    mpi_workers.append(mpi_worker)
    mpi_worker_hostnames.append(mpi_worker.name)

In [None]:
all_hostnames = mpi_master_hostname + "," + ",".join(mpi_worker_hostnames)

In [None]:
fip = mpi_lease.get_reserved_floating_ips()[0]
mpi_master.associate_floating_ip(fip)

## Create inventory.ini to work with ansible

In [None]:
with open("./inventory.ini", "w") as f:
    f.write("[master_node]\n")
    f.write(f"{mpi_master.name} ansible_host={fip}\n\n")
    
    f.write("[worker_nodes]\n")
    f.write("\n".join(f"{w.name} ansible_host={w.addresses[network_name][0]['addr']}" for w in mpi_workers))
    f.write("\n\n")
    f.write("[worker_nodes:vars]\n")
    f.write(f"ansible_ssh_common_args='-o ProxyJump=cc@{fip}'")

## Use Ansible to create an MPI Cluster

In [None]:
import ansible_runner
import tempfile
tmpdir = tempfile.TemporaryDirectory()
ansible_run = ansible_runner.run(
    private_data_dir=tmpdir.name,
    inventory=os.path.abspath("inventory.ini"),
    envvars = {
        "ANSIBLE_PYTHON_INTERPRETER": "/usr/bin/python3",
        "ANSIBLE_SSH_ARGS": f"-F {os.path.abspath("config")}",
    },
    extravars={
        "spack_packages": ["pdsh"]
    },
    playbook=os.path.abspath("mpi-cluster.yml"),
    verbosity=0
)

In [None]:
mpi_master.upload("./examples/src/hello.cpp", "/home/cc/hello.cpp")
mpi_master.upload("./examples/mpi_jobs/run_hello_rocm.sh", "/home/cc/run_hello.sh")

In [None]:
mpi_master.execute(f'bash -lc "source /home/cc/run_hello.sh {mpi_master_hostname} {",".join(mpi_worker_hostnames)}"')