# monitor_workflow_and_report_results

Top-level Notebook for monitoring and reporting results of workflow DRS data access scale tests.

In [None]:
import os
import time
from datetime import datetime
from pathlib import Path

import psutil

# Custom Setup for Execution Environment

## Setup for Michael's Broad laptop.

In [None]:
# Enable external development setup
external_development=True

In [None]:
# TODO Set this differently for running in Terra Jupyter Cloud Environment.
if external_development:
    NOTEBOOK_EXECUTION_DIRECTORY="/Users/mbaumann/Repos/mbaumann-broad/"
    %cd {NOTEBOOK_EXECUTION_DIRECTORY}

In [None]:
if external_development:
    # The processing performed by this Notebook requires grep with PCRE
    # support, which the version of grep that comes with MacOS Monterey
    # does not provide. Therefore, install it with homebrew and add to PATH:
    # $ brew install grep
    os.environ['PATH'] = f"/usr/local/opt/grep/libexec/gnubin:{os.environ['PATH']}"

    # The processing performed by this Notebook requires sed "-z" option
    # support, which the version of sed that comes with MacOS Monterey
    # does not provide. Therefore, install it with homebrew and add to PATH:
    # $ brew install gnu-sed
    os.environ['PATH'] = f"/usr/local/opt/gnu-sed/libexec/gnubin:{os.environ['PATH']}"

    # The processing performed by this Notebook requires xargs "-a" option
    # support, which the version of sed that comes with MacOS Monterey
    # does not provide. Therefore, install it with homebrew and add to PATH:
    # $ brew install findutils
    os.environ['PATH'] = f"/usr/local/opt/findutils/libexec/gnubin:{os.environ['PATH']}"

    if os.environ.get('WORKSPACE_BUCKET') is None:
        # Workspace bucket used by: `DRS and Signed URL Development - Dev`
        # WORKSPACE_BUCKET="gs://fc-b14e50ee-ccbe-4ee9-9aa4-f4e4ff85bc03"
        WORKSPACE_BUCKET="gs://fc-26863db0-1fe6-463b-a05b-9f8c8cb33dac"
        os.environ['WORKSPACE_BUCKET'] = WORKSPACE_BUCKET

In [None]:
os.environ['PATH']

# Manual Input/Configuration

Enter the workflow submission id, which is available from Job Manager
after the workflow is started.
For example:
```
 WF_SUBMISSION_ID="1a72b974-00c4-4316-86d5-7a7b1045f9ef"
```

In [None]:
#TODO Uncomment this line when using as a template Notebook
# WF_SUBMISSION_ID = "<Enter workflow submission id as shown above.>"
#WF_SUBMISSION_ID="1a72b974-00c4-4316-86d5-7a7b1045f9ef"
# md5_n_by_m_scatter 100 files
WF_SUBMISSION_ID="73942d03-98bb-4692-b0fb-cbefc2897d1a"

Enter the start time for the workflow using UTC/GMT in this format:
```
2022/02/19 01:33:05
YYYY/MM/DD HH:MM:SS
```

For example, the current time in UTC in this format is:

In [None]:
datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S")

In [None]:
#TODO Uncomment this line when using as a template Notebook
# WF_START_TIME = "<Enter UTC date/time in the format above.>"
WF_START_TIME = datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S")

## Processing Steps to Run

In [None]:
monitor_workflow=True
copy_workflow_logs=True
extract_timeseries_data=True
display_results=True

# Constants

## Data Access Rate Constants

In [None]:
WORKSPACE_BUCKET=os.environ['WORKSPACE_BUCKET']
WORKSPACE_BUCKET

In [None]:
WS_WF_GS_URI=f"{WORKSPACE_BUCKET}/{WF_SUBMISSION_ID}"
WS_WF_GS_URI

In [None]:
TEST_RESULTS_DIR=Path('./test_results').resolve().as_posix()
! mkdir -p "{TEST_RESULTS_DIR}"
TEST_RESULTS_DIR

In [None]:
WF_TEST_RESULTS_DIR=os.path.join(TEST_RESULTS_DIR, f"submission_{WF_SUBMISSION_ID}")
! mkdir -p "{WF_TEST_RESULTS_DIR}"
WF_TEST_RESULTS_DIR

In [None]:
WF_TEST_RESULTS_WORKFLOW_LOGS_DIR=os.path.join(WF_TEST_RESULTS_DIR, "workflow-logs")
WF_TEST_RESULTS_WORKFLOW_LOGS_DIR

## Response Time Monitoring Constants

Currently supported values:  `BDC`

In [None]:
PROJECT_TO_MONITOR="BDC"

Currently supported values: `DEV`

In [None]:
TERRA_DEPLOYMENT_TIER="ALPHA"

In [None]:
MONITORING_OUTPUT_DIR=os.path.join(WF_TEST_RESULTS_DIR,
                                   f"monitoring_data_{datetime.strptime(WF_START_TIME, '%Y/%m/%d %H:%M:%S').strftime('%Y%m%d_%H%M%S')}")
! mkdir -p "{MONITORING_OUTPUT_DIR}"
MONITORING_OUTPUT_DIR

## General Constants

In [None]:
SCRIPTS=Path("./terra-workflow-scale-test-tools/scripts").resolve().as_posix()
SCRIPTS

In [None]:
NOTEBOOKS=Path("./terra-workflow-scale-test-tools/notebooks").resolve().as_posix()
NOTEBOOKS

# Functions

In [None]:
def start_monitoring_background_process() -> psutil.Process:
    print("Starting monitoring background process ...")
    process = psutil.Popen(["python3", f"{SCRIPTS}/monitor_response_times.py",
                            "--project", PROJECT_TO_MONITOR,
                            "--terra-deployment-tier", TERRA_DEPLOYMENT_TIER,
                            "--output-dir", MONITORING_OUTPUT_DIR])
    print(f"Started {process}")
    return process

In [None]:
def stop_monitoring_background_process(process: psutil.Process) -> None:
    print("Stopping monitoring background process ...")
    process.terminate()
    process.wait(60)
    print("Stopped monitoring background process.")

In [None]:
def wait_for_workflow_to_complete() -> None:
    # TODO Implement this
    pretend_duration = 60
    print(f"Pretending to wait for workflow to complete by sleeping {pretend_duration} seconds ...")
    time.sleep(pretend_duration)
    print("Pretending workflow is complete!")

# Monitor response times during workflow execution

In [None]:
if monitor_workflow:
    monitoring_process = start_monitoring_background_process()

    wait_for_workflow_to_complete()

    stop_monitoring_background_process(monitoring_process)

# Copy workflow logs from the workspace bucket to the local filesystem

In [None]:
if copy_workflow_logs:
    workflow_logs_path = Path(WF_TEST_RESULTS_WORKFLOW_LOGS_DIR)
    if not workflow_logs_path.exists():
        workflow_logs_path.mkdir(parents=True, exist_ok=False)
        # Copy the logs - this can take a long time (tens of minutes to hours)
        ! "{SCRIPTS}/copy_workflow_logs_to_local_fs.sh" -s "{WS_WF_GS_URI}" -d "{WF_TEST_RESULTS_WORKFLOW_LOGS_DIR}" > "{WF_TEST_RESULTS_WORKFLOW_LOGS_DIR}/copy_workflow_logs_to_local_fs.log" 2>&1
    else:
        print(f"The workflow-logs directory already exists: {WF_TEST_RESULTS_WORKFLOW_LOGS_DIR}")
        print("Skipping copy of the workflow logs.")
else:
    print("Currently configured to skip copying of workflow logs.")


In [None]:
if extract_timeseries_data:
    ! "{SCRIPTS}/extract_drs_localization_timestamps.sh" -d "{WF_TEST_RESULTS_DIR}" # > "{WF_TEST_RESULTS_WORKFLOW_LOGS_DIR}/extract_drs_localization_timestamps.log" 2>&1

In [None]:
if extract_timeseries_data:
    ! "{SCRIPTS}/extract_drs_localization_fallback_timestamps.sh" -d "{WF_TEST_RESULTS_DIR}"  # > "{WF_TEST_RESULTS_WORKFLOW_LOGS_DIR}/extract_drs_localization_fallback_timestamps.log" 2>&1

# Display the results of the workflow run

## Workflow DRS localization rates

In [None]:
if display_results:
    %run "{NOTEBOOKS}/graph_drs_data_access_rates.ipynb"

## Service/endpoint response times

In [None]:
if display_results:
    %run "{NOTEBOOKS}/graph_response_time_data.ipynb"