In [1]:
import os
import shutil
from glob import glob
import pathlib
import subprocess
import json
import yaml
import jupyter_client
import papermill as pm
from papermill.engines import NBClientEngine
from jinja2 import Template
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client

In [2]:
def get_Cluster(memory="25GB", account="NCGD0011", on_hub=True):
    """return client and cluster"""
    USER = os.environ["USER"]

    cluster = PBSCluster(
        cores=1,
        memory=memory,
        processes=1,
        queue="casper",
        local_directory=f"/glade/scratch/{USER}/dask-workers",
        log_directory=f"/glade/scratch/{USER}/dask-workers",
        resource_spec=f"select=1:ncpus=1:mem={memory}",
        account=account,
        walltime="06:00:00",
        interface="ib0",
    )

    if on_hub:
        jupyterhub_server_name = os.environ.get("JUPYTERHUB_SERVER_NAME", None)
        dashboard_link = (
            "https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status"
        )
        if jupyterhub_server_name:
            dashboard_link = (
                "https://jupyterhub.hpc.ucar.edu/stable/user/"
                + "{USER}"
                + f"/{jupyterhub_server_name}/proxy/"
                + "{port}/status"
            )
    else:
        dashboard_link = "http://localhost:8787/status"

    dask.config.set({"distributed.dashboard.link": dashboard_link})
    return cluster

In [3]:
def get_ClusterClient(memory="25GB", account="NCGD0011", on_hub=True):
    cluster = get_Cluster(memory, account, on_hub)
    client = Client(cluster)
    return cluster, client

In [4]:
cluster, client = get_ClusterClient()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 45225 instead


In [5]:
print(cluster)

PBSCluster(dfaf5b08, 'tcp://10.12.206.49:35149', workers=0, threads=0, memory=0 B)


In [6]:
cluster.scale(2)

In [7]:
client = Client("tcp://10.12.206.49:35149")

In [8]:
client

0,1
Connection method: Direct,
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/eromashkova/proxy/45225/status,

0,1
Comm: tcp://10.12.206.49:35149,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/eromashkova/proxy/45225/status,Total threads: 0
Started: 1 minute ago,Total memory: 0 B


In [9]:
cluster.scale(32)
client

0,1
Connection method: Direct,
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/eromashkova/proxy/45225/status,

0,1
Comm: tcp://10.12.206.49:35149,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/eromashkova/proxy/45225/status,Total threads: 0
Started: 3 minutes ago,Total memory: 0 B


In [10]:
cluster.close()

In [11]:
client.close()

2023-04-24 22:41:04,531 - distributed.client - ERROR - 
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/glade/work/eromashkova/miniconda3/envs/exp-nbscuid-042423/lib/python3.11/site-packages/distributed/comm/core.py", line 292, in connect
    comm = await wait_for(
           ^^^^^^^^^^^^^^^
  File "/glade/work/eromashkova/miniconda3/envs/exp-nbscuid-042423/lib/python3.11/site-packages/distributed/utils.py", line 1845, in wait_for
    return await fut
           ^^^^^^^^^
  File "/glade/work/eromashkova/miniconda3/envs/exp-nbscuid-042423/lib/python3.11/site-packages/distributed/comm/tcp.py", line 511, in connect
    convert_stream_closed_error(self, e)
  File "/glade/work/eromashkova/miniconda3/envs/exp-nbscuid-042423/lib/python3.11/site-packages/distributed/comm/tcp.py", line 142, in convert_stream_closed_error
    raise CommClosedError(f"in {obj}: {exc.__class__._