# WML-A Job Submission via WML-A CLI

Offical examples can be found here: https://wmla-console-cpd-wmla.apps.datascienceelite.com/ui/#/cliTools

In [None]:
%env DIR=/userfs
%env NAMESPACE=cpd-wmla
%env HOST=wmla-console-cpd-wmla.apps.cpd.mskcc.org
%env BASE_URL=https://cpd-cpd.apps.cpd.mskcc.org

%env dlicmd=wmla-utils/dlicmd.py

%env VOLUME_DISPLAY_NAME=DeepLIIFData

### Submit Jobs
#### PyTorch (single GPU or multiple GPUs on one node with multithreading)

In [None]:
%env DIR_job_submission=/userfs/job_submission
%env file_exec=train_command_consumer_local.py

!rm -rf $DIR_job_submission
!mkdir -p $DIR_job_submission

!cp -r $DIR/deepliif-repo/deepliif $DIR_job_submission
!cp $DIR/deepliif-repo/* $DIR_job_submission
!cp $DIR/$file_exec $DIR_job_submission
!cp $DIR/monitor_gpu.sh $DIR_job_submission
!cp $DIR/storage_volume_utils.py $DIR_job_submission

In [None]:
!python $dlicmd --exec-start PyTorch --rest-host $HOST --rest-port -1 --jwt-token $USER_ACCESS_TOKEN \
                  --msd-env USER_ACCESS_TOKEN=$USER_ACCESS_TOKEN --msd-env BASE_URL=$BASE_URL \
                  --msd-env VOLUME_DISPLAY_NAME=$VOLUME_DISPLAY_NAME \
                  --workerDeviceNum 1 --workerMemory 8g \
                  --model-dir $DIR_job_submission --model-main $file_exec \
                  --cs-datastore-meta type=fs,data_path=DeepLIIF_Datasets/

### Submit Jobs
#### distPyTorch (multiprocessing using DDP)

In [None]:
%env DIR_job_submission=/userfs/job_submission
%env file_exec=train_command.py

!rm -rf $DIR_job_submission
!mkdir -p $DIR_job_submission

!cp -r $DIR/deepliif-repo/deepliif $DIR_job_submission
!cp $DIR/deepliif-repo/* $DIR_job_submission
!cp $DIR/$file_exec $DIR_job_submission
!cp $DIR/monitor_gpu.sh $DIR_job_submission
!cp $DIR/storage_volume_utils.py $DIR_job_submission

In [None]:
%%writefile /userfs/conf_distPyTorch.py
import os
import torch.distributed as dist
def init_process():
    dist.init_process_group(
        backend='nccl',
        init_method='tcp://' + os.environ['MASTER_ADDR'] + ':' + os.environ['MASTER_PORT'],
        rank=int(os.environ['RANK']),
        world_size=int(os.environ['WORLD_SIZE']))
    
print('------ initiate process group... ------')
init_process()

In [None]:
# cat cannot change file in place, so we create a new one and use it to overwrite cli.py
!cat /userfs/conf_distPyTorch.py $DIR_job_submission/cli.py > $DIR_job_submission/cli_edited.py
!mv $DIR_job_submission/cli_edited.py $DIR_job_submission/cli.py

In [None]:
!python $dlicmd --exec-start distPyTorch --rest-host $HOST --rest-port -1 --jwt-token $USER_ACCESS_TOKEN \
                  --msd-env USER_ACCESS_TOKEN=$USER_ACCESS_TOKEN --msd-env BASE_URL=$BASE_URL \
                  --msd-env VOLUME_DISPLAY_NAME=$VOLUME_DISPLAY_NAME \
                  --numWorker 3 --workerMemory 8g \
                  --model-dir $DIR_job_submission --model-main $file_exec \
                  --cs-datastore-meta type=fs,data_path=DeepLIIF_Datasets/

#### Specify Job/App ID

In [None]:
%env APP_ID=cpd-wmla-212

### Delete Jobs (and associated results/logs)
#### delete one job

In [None]:
# !python $dlicmd.py --exec-delete $NAMESPACE-38 --rest-host $HOST --rest-port -1 --jwt-token $USER_ACCESS_TOKEN 

#### delete multiple jobs in a loop

In [None]:
for i in range(140,160):
    %env i=$i
    !python $dlicmd --exec-delete $NAMESPACE-$i --rest-host $HOST --rest-port -1 --jwt-token $USER_ACCESS_TOKEN 

### Get Job Status

In [None]:
# !python $dlicmd --exec-get $NAMESPACE-10 --rest-host $HOST --rest-port -1 --jwt-token $USER_ACCESS_TOKEN

### Get Job Log
#### last 10 lines

In [None]:
!python $dlicmd --exec-outlogs $APP_ID --rest-host $HOST --rest-port -1 --jwt-token $USER_ACCESS_TOKEN

#### full log files

In [None]:
!python $dlicmd --exec-trainerrlogs $APP_ID --rest-host $HOST --rest-port -1 --jwt-token $USER_ACCESS_TOKEN

In [None]:
!python dlicmd --exec-trainoutlogs $APP_ID --rest-host $HOST --rest-port -1 --jwt-token $USER_ACCESS_TOKEN

## Others - Visualizer Application

In [None]:
from ws_applications import display_link

In [None]:
display_link()