In [2]:
# Python Standard Library
import datetime
import os, sys
import re

# Third-Party Packages
from IPython.core.display import display, HTML
from IPython.display import display_html
# PG: Not sure if I like importing matplotlib so often, could probably be
# simpler..
from matplotlib import cm
from matplotlib import pyplot as plt
from matplotlib.patches import Circle, Wedge, Rectangle

import matplotlib
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
import paramiko

In [3]:
from esm_viz.esm_viz import read_simulation_config

In [4]:
config = read_simulation_config("../../example.yaml")

In [5]:
config

{'user': 'a270077',
 'host': 'mistral.dkrz.de',
 'basedir': '/work/ba0989/a270077/AWICM_PISM/LGM_011',
 'model': 'AWICM',
 'storagedir': '/scratch/work/pgierz/',
 'use_hvplot': True,
 'general': ['queue info',
  'run efficiency',
  'disk usage',
  'simulation timeline',
  'progress bar',
  'newest log'],
 'echam': {'Global Timeseries': {'temp2': {'file pattern': '${EXP_ID}_echam6_echam_??????.grb',
    'plot arguments': {'linewidth': 1, 'color': 'black'}},
   'albedo': {'file pattern': '${EXP_ID}_echam6_echam_??????.grb'},
   'aprl': {'file pattern': '${EXP_ID}_echam6_echam_??????.grb'},
   'aprc': {'file pattern': '${EXP_ID}_echam6_echam_??????.grb'}},
  'Global Climatology': {'temp2': {'file pattern': '${EXP_ID}_echam6_echam_??????.grb',
    'plot arguments': {'cmap': 'cmocean.thermal'}},
   'albedo': {'file pattern': '${EXP_ID}_echam6_echam_??????.grb',
    'plot arguments': {'cmap': 'cmocean.ice'}},
   'aprl': {'file pattern': '${EXP_ID}_echam6_echam_??????.grb',
    'plot argument

In [6]:
def get_log_output(config, esm_style=True):
    exp_path = config["basedir"]
    model_name = config["model"].lower()
    ssh = paramiko.SSHClient()
    ssh.load_system_host_keys()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect(config["host"], username=config["user"], password="ryacHarf*")
    expid = exp_path.split("/")[-1]
    if esm_style:
        log_file = exp_path + "/scripts/" + expid + "_" + model_name + "_compute.log"
    else:
        log_file = exp_path + "/scripts/" + expid + ".log"
    stdin, stdout, stderr = ssh.exec_command("cat " + log_file)
    return stdout.readlines()

In [7]:
def generate_dataframe_from_esm_logfile(log):
    df = pd.DataFrame([l.split(" : ") for l in log], columns=["Date", "Message"])
    df2 = df["Message"].str.split(expand=True)
    # We drop the first row since it says "Start of Experiment"
    log_df = pd.concat([df[1:]["Date"], df2[1:]], axis=1)
    log_df.columns = ["Date", "Run Number", "Exp Date", "Job ID", "Seperator", "State"]
    log_df.drop("Seperator", axis=1, inplace=True)
    log_df.set_index("Date", inplace=True)
    log_df.index = pd.to_datetime(log_df.index)
    return log_df

In [8]:
def simulation_timeline(config):
    log = get_log_output(config)
    log_df = generate_dataframe_from_esm_logfile(log)
    # Drop the last entry if it's start
    if "start" in log_df.iloc[-1]["State"]:
        end_of_log = log_df.iloc[:-1].tail(30)
    else:
        end_of_log = log_df.tail(30)
    end_groups = end_of_log.groupby("Run Number")
    f, ax = plt.subplots(1, 1, dpi=150, figsize=(15, 1.5))
    for name, group in end_groups:
        try:
            bdate = group.index[0]
            edate = group.index[1]
        except IndexError:
            print("Sorry, couldn't make a timeline")
            plt.close(f)
            return
        edate, bdate = [mdates.date2num(item) for item in (edate, bdate)]
        # The color is the same as the progressbar below, use the colormeter to figure it out.
        ax.barh(
            0,
            edate - bdate,
            left=bdate,
            height=0.2,
            color=(217.0 / 255.0, 83.0 / 255.0, 79.0 / 255.0),
            edgecolor="black",
        )
    ax.set_ylim(-0.5, 0.5)
    for direction in ["top", "left", "right"]:
        ax.spines[direction].set_visible(False)
    ax.yaxis.set_visible(False)
    ax.xaxis_date()
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%H:%M %d.%m.%y"))

In [9]:
log = get_log_output(config)

In [10]:
log_df = generate_dataframe_from_esm_logfile(log)



In [11]:
import hvplot.pandas

In [71]:
if "start" in log_df.iloc[-1]["State"]:
        end_of_log = log_df.iloc[:-1].tail(30)
else:
    end_of_log = log_df.tail(30)
end_groups = end_of_log.groupby("Run Number")

In [155]:
new_df = {"Run Number": [], "Start Date": [], "End Date": [], "Exp Date": [], "Job ID": []}
for name, group in end_groups:
    new_df['Run Number'].append(name)
    start_vals = group[group['State'] == 'start']
    done_vals = group[group['State'] == 'done']
    new_df['Start Date'].append(start_vals.index[0])
    new_df['End Date'].append(done_vals.index[0])
    new_df['Exp Date'].append(start_vals['Exp Date'].values[-1])
    new_df['Job ID'].append(start_vals['Job ID'].values[-1])
new_df = pd.DataFrame(new_df, index=new_df["Run Number"])

In [156]:
pd.DataFrame(new_df)

Unnamed: 0,Run Number,Start Date,End Date,Exp Date,Job ID
1,1,2019-07-29 15:21:49,2019-07-29 17:43:42,18500101,17079573
10,10,2019-07-30 08:14:39,2019-07-30 09:36:29,18590101,17085042
11,11,2019-07-30 09:37:49,2019-07-30 11:00:15,18600101,17085448
12,12,2019-07-30 11:00:56,2019-07-30 12:23:14,18610101,17088704
13,13,2019-07-30 12:32:28,2019-07-30 13:53:19,18620101,17089269
14,14,2019-07-30 13:56:19,2019-07-30 15:22:01,18630101,17090823
2,2,2019-07-29 17:45:27,2019-07-29 19:07:35,18510101,17081241
3,3,2019-07-29 19:08:59,2019-07-29 20:31:32,18520101,17081661
4,4,2019-07-29 20:47:17,2019-07-30 01:05:05,18530101,17082956
5,5,2019-07-30 01:05:37,2019-07-30 02:27:33,18540101,17083335


In [162]:
#new_df.set_index("Run Number", inplace=True)
new_df['Run Number'] = new_df['Run Number'].index.astype('int')
new_df['Job ID'] = new_df['Job ID'].astype('int')
new_df.sort_values("Run Number", inplace=True)
new_df['Duration'] = (new_df['End Date'] - new_df['Start Date']).dt.total_seconds() / 60.

In [163]:
new_df

Unnamed: 0,Run Number,Start Date,End Date,Exp Date,Job ID,Duration
1,1,2019-07-29 15:21:49,2019-07-29 17:43:42,18500101,17079573,141.883333
2,2,2019-07-29 17:45:27,2019-07-29 19:07:35,18510101,17081241,82.133333
3,3,2019-07-29 19:08:59,2019-07-29 20:31:32,18520101,17081661,82.55
4,4,2019-07-29 20:47:17,2019-07-30 01:05:05,18530101,17082956,257.8
5,5,2019-07-30 01:05:37,2019-07-30 02:27:33,18540101,17083335,81.933333
6,6,2019-07-30 02:28:29,2019-07-30 03:50:13,18550101,17083737,81.733333
7,7,2019-07-30 03:58:14,2019-07-30 05:20:31,18560101,17084165,82.283333
8,8,2019-07-30 05:21:05,2019-07-30 06:43:01,18570101,17084429,81.933333
9,9,2019-07-30 06:43:24,2019-07-30 08:03:39,18580101,17084663,80.25
10,10,2019-07-30 08:14:39,2019-07-30 09:36:29,18590101,17085042,81.833333


In [164]:
new_df.hvplot.bar(x="Start Date", y="Run Number", bar_width='Duration')



In [169]:
end_of_logs = new_df.tail()

In [177]:
end_of_logs

Unnamed: 0,Run Number,Start Date,End Date,Exp Date,Job ID,Duration
10,10,2019-07-30 08:14:39,2019-07-30 09:36:29,18590101,17085042,81.833333
11,11,2019-07-30 09:37:49,2019-07-30 11:00:15,18600101,17085448,82.433333
12,12,2019-07-30 11:00:56,2019-07-30 12:23:14,18610101,17088704,82.3
13,13,2019-07-30 12:32:28,2019-07-30 13:53:19,18620101,17089269,80.85
14,14,2019-07-30 13:56:19,2019-07-30 15:22:01,18630101,17090823,85.7


In [228]:
o = new_df.hvplot.step(x='Run Number', y='Duration', where="post",
                   hover_cols=['Start Date', 'End Date', 'Exp Date', 'Job ID'],
                   color="black", grid=True,
                   width=1500, height=400, 
                   xlabel="Run Number", ylabel="Duration (minutes)", ylim=(0, 500),
                   title="Simulation Progress")

In [230]:
o