# Phase-5-Diagnosing-Runtime-and-Resource-Usage.ipynb

This notebook checks obtains runtimes and resource usage for each BEAST 2 run when running the Optimising-BEAST-Runs.ipynb workflow.

## Instructions

Once all the BEAST runs have been completed simply run all cells. **Note** if using slurm's sbatch for the runs you can check their progress via `squeue --me`.


In [None]:
import os
import pandas as pd
from beast_pype.diagnostics.runtime import get_beast_runtimes,   get_slurm_job_stats
import seaborn as sns
from beast_pype.outputs import summary_stats_and_plot

Get all the configurations run.

In [None]:
configurations = [path for path in os.listdir() if os.path.isdir(path) and path not in ['.ipynb_checkpoints']]

### Collate all the data

In [None]:
beast_runtimes = []
slurm_job_stats = []
for configuration in configurations:
    beast_outputs_path = f'{configuration}/beast_outputs'
    beast_runtime= get_beast_runtimes(beast_outputs_path,
                                 outfile_startswith='run-with-seed-',
                                 outfile_endswith='.out')
    beast_runtime['Configuration'] = configuration
    beast_runtimes.append(beast_runtime)
    if os.path.isfile(f'{beast_outputs_path}/slurm_job_ids.txt'):
        jobs_df = pd.read_csv(f'{beast_outputs_path}/slurm_job_ids.txt', sep=';')
        jobs_df['JobID'] = jobs_df['JobID'].astype(str)
        stats_df = get_slurm_job_stats(jobs_df['JobID'].to_list())
        job_stats_df = jobs_df.merge(stats_df, on='JobID')
        job_stats_df['Configuration'] = configuration
        slurm_job_stats.append(job_stats_df)

beast_runtimes = pd.concat(beast_runtimes)
beast_runtimes.to_csv('BEAST_runtimes.csv', index=False)
if slurm_job_stats:
    slurm_job_stats = pd.concat(slurm_job_stats)
    slurm_job_stats.to_csv('BEAST_slurm_stats.csv', index=False)

## Display raw runtime stats

### Runimes:

In [None]:
beast_runtimes

#### Slurm Stats if beast was run using `sbatch`

In [None]:
if not isinstance(slurm_job_stats, list):
    display(slurm_job_stats)

## Figures Summary Stats by Configuration
### Beast Runtimes

In [None]:
beast_runtimes_summary = beast_runtimes.groupby('Configuration').describe().transpose()

#### Seconds
Box Violin plot

In [None]:
run_time_plot, run_time_df, run_time_summary = summary_stats_and_plot(
    beast_runtimes,
    x='Configuration',
    y='run_time_D_H_M_S',
    convert_plot_to_seconds=True)
display(run_time_plot, run_time_summary)


Summary Stats

### Slurm Stats if beast was run using `sbatch`

#### Elapsed time

In [None]:
if not isinstance(slurm_job_stats, list):
    fig, y_df, summary_stats = summary_stats_and_plot(
        slurm_job_stats,
        x='Configuration',
        y='Elapsed',
        convert_plot_to_seconds=True)
    display(fig, summary_stats)


#### Timelimit Used %

In [None]:
if not isinstance(slurm_job_stats, list):
    fig, y_df, summary_stats = summary_stats_and_plot(
        slurm_job_stats,
        x='Configuration',
        y='Timelimit Used %')
    display(fig, summary_stats)

#### TotalCPU

In [None]:
if not isinstance(slurm_job_stats, list):
    fig, y_df, summary_stats = summary_stats_and_plot(
        slurm_job_stats,
        x='Configuration',
        y='TotalCPU',
        convert_plot_to_seconds=True)
    display(fig, summary_stats)
    

#### CPUTime

In [None]:
if not isinstance(slurm_job_stats, list):
    fig, y_df, summary_stats = summary_stats_and_plot(
        slurm_job_stats,
        x='Configuration',
        y='CPUTime',
        convert_plot_to_seconds=True)
    display(fig, summary_stats)

#### CPU Efficiency (%)

In [None]:
if not isinstance(slurm_job_stats, list):
    fig, y_df, summary_stats = summary_stats_and_plot(
        slurm_job_stats,
        x='Configuration',
        y='CPU Efficiency (%)')
    display(fig, summary_stats)

#### Max RAM Used (GB)

In [None]:
if not isinstance(slurm_job_stats, list):
    fig, y_df, summary_stats = summary_stats_and_plot(
        slurm_job_stats,
        x='Configuration',
        y='Max RAM Used (GB)')
    display(fig, summary_stats)

#### RAM Efficiency (%)

In [None]:
if not isinstance(slurm_job_stats, list):
    fig, y_df, summary_stats = summary_stats_and_plot(
        slurm_job_stats,
        x='Configuration',
        y='RAM Efficiency (%)')
    display(fig, summary_stats)