In [None]:
import dask.distributed as dd
from dask.distributed import Client, LocalCluster, progress
from dask_jobqueue import PBSCluster
from distributed.utils import tmpfile
from dask.distributed import get_worker
import os

In [None]:
import os
# The jupyter notebook is launched from your $HOME directory.
# Change the working directory to the workshop directory
# which was created in your username directory under /scratch/vp91
os.chdir(os.path.expandvars("/scratch/vp91/$USER/"))

In [None]:
os.environ['DASK_PYTHON'] = '/scratch/vp91/AAPP2023/dask-python3.9-venv/bin/python3'
setup_commands = ["module load python3/3.9.2", "source /scratch/vp91/AAPP2023/dask-python3.9-venv/bin/activate"]
extra = ['-q normal',
         '-P vp91', 
         '-l ncpus=48', 
         '-l mem=192GB']

cluster = PBSCluster(walltime="00:50:00", 
                     cores=48, 
                     memory="192GB",
                     #processes=48, 
                     shebang='#!/usr/bin/env bash',
                     job_extra_directives=extra, 
                     local_directory='$TMPDIR', 
                     job_directives_skip=["select"], 
                     interface="ib0",
                     job_script_prologue=setup_commands,
                     python=os.environ["DASK_PYTHON"])

In [None]:
print(cluster.job_script())

In [None]:
cluster.scale(jobs=2)

In [None]:
cluster

In [None]:
!qstat

In [None]:
client = Client(cluster)

In [None]:
client

In [None]:
def slow_increment(x):
    return x+1

In [None]:
futures = client.submit(slow_increment, 5000)

In [None]:
futures

In [None]:
progress(futures)

In [None]:
from dask_ml.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train

In [None]:
from sklearn.linear_model import SGDClassifier

est = SGDClassifier(loss='squared_error', penalty='l2', tol=1e-3)

In [None]:
from dask_ml.wrappers import Incremental

inc = Incremental(est, scoring='accuracy')

In [None]:
import dask
X_train, X_test, y_train, y_test = dask.persist(X_train, X_test, y_train, y_test)

In [None]:
import dask.array as da
classes = da.unique(y_train).compute()
classes

In [None]:
inc.fit(X_train, y_train, classes=classes)

In [None]:
with joblib.parallel_backend('dask'):
    inc.fit(X_train, y_train, classes=classes)