# monitor_workflow_and_report_results

Top-level Notebook for monitoring and reporting results of workflow DRS data access scale tests.

# Imports

In [None]:
import os
import sys
import time
from datetime import datetime
from pathlib import Path

import psutil

In [None]:
def find_relative_directory(relative_path_list: list) -> str:
    print(f"CWD: {os.getcwd()}")   # Debugging
    for relative_path in relative_path_list:
        if os.path.isdir(relative_path):
            print(f"Found directory: {relative_path}")  # Debugging
            return relative_path
    raise Exception(f"Directory in list not found: {relative_path_list}")

In [None]:
SCRIPTS = find_relative_directory(["scripts", "terra-workflow-scale-test-tools/scripts", "../scripts"])
sys.path.insert(0, SCRIPTS)
print(f"sys.path: {sys.path}")  # Debugging

In [None]:
from user_input import UserInputUI
from workflow_info import WorkflowDAO

In [None]:
def find_notebooks_relative_path() -> str:
    cwd = os.getcwd()
    if str(cwd).endswith("terra-workflow-scale-test-tools/notebooks"):
        return str(".")
    elif os.path.isdir("terra-workflow-scale-test-tools/notebooks"):
        return "terra-workflow-scale-test-tools/notebooks"
    else:
        raise Exception(f"Notebooks directory not found relative to {str(cwd)}")

NOTEBOOKS = find_notebooks_relative_path()

# Optionally Configure for Local/External-to-Terra Development

In [None]:
# Enable external development setup
# TODO Set this to False for running in Terra Jupyter Cloud Environment.
external_development=True
if external_development:
    external_development_notebook_path = os.path.join(NOTEBOOKS,"external_development.ipynb")
    %run $external_development_notebook_path
    print(f"CWD: {os.getcwd()}")  # Debugging
    print(f"PATH: {os.getenv('PATH')}")  # Debugging

# Manual Input/Configuration

In [None]:
ui = UserInputUI()
ui.display()

Currently supported values: `BDC`

In [None]:
PROJECT_TO_MONITOR = ui.get_data_service()
PROJECT_TO_MONITOR

Currently supported values: `DEV`, `ALPHA`

In [None]:
TERRA_DEPLOYMENT_TIER=ui.get_terra_deployment_tier()
TERRA_DEPLOYMENT_TIER

In [None]:
WF_SUBMISSION_ID = ui.get_submission_id()
# WF_SUBMISSION_ID="32f74aa9-7779-4d28-aec7-641d26307beb"
WF_SUBMISSION_ID

# Get Workflow Details

In [None]:
WORKSPACE_NAMESPACE = os.getenv('WORKSPACE_NAMESPACE')
WORKSPACE_NAME = os.getenv('WORKSPACE_NAME')

In [None]:
workflow_dao = WorkflowDAO(TERRA_DEPLOYMENT_TIER, WORKSPACE_NAMESPACE, WORKSPACE_NAME, WF_SUBMISSION_ID)
print("This may take a minute or more for large workflows ...")
print(f"\nSubmission Summary:\n{workflow_dao.get_workflow_summary_display_string()}")

In [None]:
WF_START_TIME = workflow_dao.get_submission_time('%Y/%m/%d %H:%M:%S')
WF_START_TIME

## Processing Steps to Run

In [None]:
monitor_response_times = ui.is_monitor_response_times()
monitor_response_times

In [None]:
copy_workflow_logs_for_analysis = ui.is_copy_workflow_logs_for_analysis()
copy_workflow_logs_for_analysis

In [None]:
extract_timeseries_data = ui.is_extract_timeseries_data()
extract_timeseries_data

In [None]:
display_timeseries_graphs = ui.is_display_timeseries_graphs()
display_timeseries_graphs

# General Constants

In [None]:
WORKSPACE_BUCKET=os.environ['WORKSPACE_BUCKET']
WORKSPACE_BUCKET

In [None]:
WS_WF_GS_URI=f"{WORKSPACE_BUCKET}/{WF_SUBMISSION_ID}"
WS_WF_GS_URI

In [None]:
TEST_RESULTS_DIR=Path('./test_results').resolve().as_posix()
! mkdir -p "{TEST_RESULTS_DIR}"
TEST_RESULTS_DIR

In [None]:
WF_TEST_RESULTS_DIR=os.path.join(TEST_RESULTS_DIR, f"submission_{WF_SUBMISSION_ID}")
! mkdir -p "{WF_TEST_RESULTS_DIR}"
WF_TEST_RESULTS_DIR

In [None]:
WF_TEST_RESULTS_WORKFLOW_LOGS_DIR=os.path.join(WF_TEST_RESULTS_DIR, "workflow-logs")
WF_TEST_RESULTS_WORKFLOW_LOGS_DIR

In [None]:
MONITORING_OUTPUT_DIR=os.path.join(WF_TEST_RESULTS_DIR,
                                   f"monitoring_data_{datetime.strptime(WF_START_TIME, '%Y/%m/%d %H:%M:%S').strftime('%Y%m%d_%H%M%S')}")
! mkdir -p "{MONITORING_OUTPUT_DIR}"
MONITORING_OUTPUT_DIR

# Monitor response times during workflow execution

In [None]:
def start_monitoring_background_process() -> psutil.Process:
    print("Starting monitoring background process ...")
    process = psutil.Popen(["python3", f"{SCRIPTS}/monitor_response_times.py",
                            "--project", PROJECT_TO_MONITOR,
                            "--terra-deployment-tier", TERRA_DEPLOYMENT_TIER,
                            "--output-dir", MONITORING_OUTPUT_DIR])
    print(f"Started {process}")
    return process

In [None]:
def wait_for_workflow_to_complete() -> None:
    sleep_seconds = 10
    while workflow_dao.is_in_process():
        print(f"Workflow status: {workflow_dao.get_workflow_status}")
        print(f"Sleeping {sleep_seconds} seconds ...", end="")
        time.sleep(sleep_seconds)
        print("awake now!")
        workflow_dao.update()

In [None]:
def stop_monitoring_background_process(process: psutil.Process) -> None:
    print("Stopping monitoring background process ...")
    process.terminate()
    process.wait(60)
    print("Stopped monitoring background process.")

In [None]:
if monitor_response_times:
    monitoring_process = start_monitoring_background_process()

    wait_for_workflow_to_complete()

    stop_monitoring_background_process(monitoring_process)

# Copy workflow logs from the workspace bucket to the local filesystem

In [None]:
if copy_workflow_logs_for_analysis:
    workflow_logs_path = Path(WF_TEST_RESULTS_WORKFLOW_LOGS_DIR)
    if not workflow_logs_path.exists():
        workflow_logs_path.mkdir(parents=True, exist_ok=False)
        # Copy the logs - this can take a long time (tens of minutes to hours)
        ! "{SCRIPTS}/copy_workflow_logs_to_local_fs.sh" -s "{WS_WF_GS_URI}" -d "{WF_TEST_RESULTS_WORKFLOW_LOGS_DIR}" > "{WF_TEST_RESULTS_WORKFLOW_LOGS_DIR}/copy_workflow_logs_to_local_fs.log" 2>&1
    else:
        print(f"The workflow-logs directory already exists: {WF_TEST_RESULTS_WORKFLOW_LOGS_DIR}")
        print("Skipping copy of the workflow logs.")
else:
    print("Currently configured to skip copying of workflow logs.")


In [None]:
if extract_timeseries_data:
    ! "{SCRIPTS}/extract_drs_localization_timestamps.sh" -d "{WF_TEST_RESULTS_DIR}" # > "{WF_TEST_RESULTS_WORKFLOW_LOGS_DIR}/extract_drs_localization_timestamps.log" 2>&1

In [None]:
if extract_timeseries_data:
    ! "{SCRIPTS}/extract_drs_localization_fallback_timestamps.sh" -d "{WF_TEST_RESULTS_DIR}"  # > "{WF_TEST_RESULTS_WORKFLOW_LOGS_DIR}/extract_drs_localization_fallback_timestamps.log" 2>&1

# Display the results of the workflow run

## Workflow DRS localization rates

In [None]:
if display_timeseries_graphs:
    %run "{NOTEBOOKS}/graph_drs_data_access_rates.ipynb"

## Service/endpoint response times

In [None]:
if display_timeseries_graphs:
    %run "{NOTEBOOKS}/graph_response_time_data.ipynb"