# monitor_workflow_and_report_results

Top-level Notebook for monitoring and reporting results of workflow DRS data access scale tests.

In [1]:
import os
import subprocess
import time

from pathlib import Path

# Custom Setup for Execution Environment

## Setup for Michael's Broad laptop.

In [2]:
# Enable external development setup
external_development=True

In [3]:
# TODO Set this differently for running in Terra Jupyter Cloud Environment.
if external_development:
    NOTEBOOK_EXECUTION_DIRECTORY="/Users/mbaumann/Repos/mbaumann-broad/"
    %cd {NOTEBOOK_EXECUTION_DIRECTORY}

/Users/mbaumann/Repos/mbaumann-broad


In [4]:
if external_development:
    # The processing performed by this Notebook requires grep with PCRE
    # support, which the version of grep that comes with MacOS Monterey
    # does not provide. Therefore, install it with homebrew and add to PATH:
    # $ brew install grep
    os.environ['PATH'] = f"/usr/local/opt/grep/libexec/gnubin:{os.environ['PATH']}"

    # The processing performed by this Notebook requires sed "-z" option
    # support, which the version of sed that comes with MacOS Monterey
    # does not provide. Therefore, install it with homebrew and add to PATH:
    # $ brew install gnu-sed
    os.environ['PATH'] = f"/usr/local/opt/gnu-sed/libexec/gnubin:{os.environ['PATH']}"

    if os.environ.get('WORKSPACE_BUCKET') is None:
        # Workspace bucket used by: `DRS and Signed URL Development - Dev`
        WORKSPACE_BUCKET="gs://fc-b14e50ee-ccbe-4ee9-9aa4-f4e4ff85bc03"
        os.environ['WORKSPACE_BUCKET'] = WORKSPACE_BUCKET

# Manual Input/Configuration

Enter the worflow submission id, which is available from Job Manager
after the workflow is started.
For example:
```
 WF_SUBMISSION_ID="1a72b974-00c4-4316-86d5-7a7b1045f9ef"
```

In [5]:
WF_SUBMISSION_ID="1a72b974-00c4-4316-86d5-7a7b1045f9ef"

Enter the start time for the workflow using GMT in this format:
```
2022/02/19 01:33:05
YYYY/MM/DD HH:MM:SS
```

In [6]:
WF_START_TIME = "2022/02/19 00:55:00"

## Processing Steps to Run

In [7]:
monitor_workflow=True
copy_workflow_logs=True
extract_timeseries_data=False
display_results=False

# Constants

In [8]:
WORKSPACE_BUCKET=os.environ['WORKSPACE_BUCKET']

In [9]:
WS_WF_GS_URI=f"{WORKSPACE_BUCKET}/{WF_SUBMISSION_ID}"

In [10]:
TEST_RESULTS_DIR='./test_results'
! mkdir -p {TEST_RESULTS_DIR}

In [11]:
WF_TEST_RESULTS_DIR=f"{TEST_RESULTS_DIR}/submission_{WF_SUBMISSION_ID}"
! mkdir -p {WF_TEST_RESULTS_DIR}

In [12]:
SCRIPTS="./terra-workflow-scale-test-tools/scripts"

# Functions

In [13]:
def wait_for_workflow_to_complete() -> None:
    # TODO Implement this
    time.sleep(30)

# Monitor response times during workflow execution

In [26]:
if monitor_workflow:
    # Start background response monitoring
    subprocess.Popen(["python3", f"{SCRIPTS}/monitor_response_times.py"])

    # wait_for_workflow_to_complete()

    # Stop the response time monitoring running in the background.
    # Note: The name `responseTimeMonitor` is defined dynamically when
    # the %run statement above is executed.
    # responseTimeMonitor.stop_monitoring()

    # TODO Consider running with
    # ! python3 run SCRIPTS/monitor_response_times.py &
    # Then using pgrep & kill to terminate the process.

# Copy workflow logs from the workspace bucket to the local filesystem

In [None]:
if copy_workflow_logs:
    workflow_logs_dir = f"{WF_TEST_RESULTS_DIR}/workflow-logs"
    if not Path(workflow_logs_dir).exists():
        ! {SCRIPTS}/copy_workflow_logs_to_local_fs.sh {WS_WF_GS_URI} {workflow_logs_dir}
    else:
        print(f"The workflow-logs directory already exists: {workflow_logs_dir}")
        print("Skipping copy of the workflow logs.")


In [None]:
if extract_timeseries_data:
    ! {SCRIPTS}/extract_drs_localization_timestamps_from_local_fs.sh {WF_TEST_RESULTS_DIR}
    

In [None]:
if extract_timeseries_data:
    ! {SCRIPTS}/extract_drs_localization_fallback_timestamps_from_local_fs.sh {WF_TEST_RESULTS_DIR}

# Display the results of the workflow run

## Workflow DRS localization rates

In [None]:
if display_results:
    %run {SCRIPTS}/graph_drs_data_access_rates.ipynb
    f"{WF_TEST_RESULTS_DIR}/drs_localization_timeseries.tsv"
    f"{WF_TEST_RESULTS_DIR}/drs_localization_fallback_timeseries.tsv"

## Service/endpoint response times

In [None]:
if display_results:
    %run {SCRIPTS}/graph_response_time_data.ipynb TODO