In [1]:
import tensorflow as tf
# getting data directly from a tensorboard log dir
from tensorflow.python.summary import event_multiplexer

import pandas as pd

from collections import OrderedDict

# from matplotlib import pyplot as plt
# from matplotlib.colors import Normalize
# from numpy import around

import os

In [2]:
"""
Scalars tags (not all but most of them):
Mean_Reciprocal_Rank/Mean_Reciprocal_Rank_test
Accuracy/Accuracy_test
In_top_5/In_top_5_test
Cost_function/Total_cost_test
Cost_function_additional_metrics/Cross_entropy_test

attributes
wall_time
step
value
"""


def get_optimal_event(scalars, selection_func):
    """
    Inputs are list of ScalarEvent and a selection function (either min or max).
    e.g. for accuracy, max should be selected, while for cost, min is appropriate.
    Returns the "optimal" event, i.e. the event where the optimal value was achieved.
    """
    if selection_func not in (min, max):
        raise ValueError('selection_func should be either min or max, got unexpected {}'
                         .format(selection_func.__name__))
    
    # optimal value
    optimal_val = selection_func(
        [event.value for event in scalars])
    # optimal event (corresponding to the optimal value)
    optimal_event = [event for event in scalars 
                     if event.value == optimal_val][0]
    
    return optimal_event


def get_all_optimals(log_dir, event_acc):
    """
    Inputs are log_dir (e.g. as from child_dir) 
        and event_acc is the event accumulator.
    Gets optimal events (using 'get_optimal_event') for 
        total cost, Mean Reciprocal Rank and accuracy.
    Returns the three optimal events (optimal_cost, optimal_mrr, optimal_accuracy),
        as well as the three ScalarEvent lists (total_cost, mrr, accuracy)
    """
    # get ScalarEvent lists
    total_cost = event_acc.Scalars(log_dir, 'Cost_function/Total_cost_test')
    mrr = event_acc.Scalars(log_dir, 'Mean_Reciprocal_Rank/Mean_Reciprocal_Rank_test')
    accuracy = event_acc.Scalars(log_dir, 'Accuracy/Accuracy_test')

    # get optimal events
    optimal_cost = get_optimal_event(total_cost, min)
    optimal_mrr = get_optimal_event(mrr, max)
    optimal_accuracy = get_optimal_event(accuracy, max)

    return (optimal_cost, optimal_mrr, optimal_accuracy, 
            total_cost, mrr, accuracy)


def value_at_other_optimal(scalars, optimal_event):
    """
    Inputs are scalars, a list of ScalarEvent and 
        optimal_event, an optimal ScalarEvent.
    Retruns the value from scalars at the optimal_event step.
    e.g. value_at_other_optimal(accuracy, optimal_cost) returns
        the accuracy value at the step where cost was optimal.
    """
    return [event.value for event in scalars 
            if event.step == optimal_event.step][0]


def metrics_dict_from_log(log_dir, 
                          event_acc=None, 
                          optimal_cost=None, 
                          optimal_mrr=None, 
                          optimal_accuracy=None, 
                          total_cost=None, 
                          mrr=None, 
                          accuracy=None):
    """
    Input log_dir is a Tensorboard log directory
        and event_acc is the event accumulator.
    Other inputs are optional and will be generated if not provided.
    Returns an OrderedDict with the model string (log dir name) and evaluation metrics.
    """
    # check if event_acc is None
    if event_acc is None:
        event_acc = event_multiplexer\
            .EventMultiplexer()\
            .AddRunsFromDirectory(log_dir)
        event_acc.Reload()
    
    # check if all optional input values are None
    if not any(a is not None 
               for a in [optimal_cost, 
                         optimal_mrr, 
                         optimal_accuracy, 
                         total_cost, 
                         mrr, 
                         accuracy]):
        # get evaluation metrics data from log dir
        (optimal_cost, optimal_mrr, optimal_accuracy, 
         total_cost, mrr, accuracy) = get_all_optimals(log_dir, event_acc)
        
    return OrderedDict(
        [('Model_str', log_dir), 
         ('Cost @ optimal cost', value_at_other_optimal(total_cost, optimal_cost)), 
         ('MRR @ optimal cost', value_at_other_optimal(mrr, optimal_cost)), 
         ('Accuracy @ optimal cost', value_at_other_optimal(accuracy, optimal_cost)), 
         ('step @ optimal cost', optimal_cost.step), 
         
         ('Cost @ optimal MRR', value_at_other_optimal(total_cost, optimal_mrr)), 
         ('MRR @ optimal MRR', value_at_other_optimal(mrr, optimal_mrr)), 
         ('Accuracy @ optimal MRR', value_at_other_optimal(accuracy, optimal_mrr)), 
         ('step @ optimal MRR', optimal_mrr.step), 
         
         ('Cost @ optimal accuracy', value_at_other_optimal(total_cost, optimal_accuracy)), 
         ('MRR @ optimal accuracy', value_at_other_optimal(mrr, optimal_accuracy)), 
         ('Accuracy @ optimal accuracy', value_at_other_optimal(accuracy, optimal_accuracy)),
         ('step @ optimal accuracy', optimal_accuracy.step)
         ])

In [3]:
# specify path (for parent log dir)
log_parent_dirs = ['./experiments/logdir_exper_4_20_GRU/',
                   './experiments/logdir_exper_4_20_GRU_bidir/',
                   './experiments/logdir_exper_4_20_LSTM/',
                   './experiments/logdir_exper_4_20_LSTM_bidir/']

print('cutting short the number of log dirs (else crash)')
log_parent_dirs = log_parent_dirs[:3]

cutting short the number of log dirs (else crash)


In [4]:
event_accum = event_multiplexer.EventMultiplexer()
for log_dir in log_parent_dirs:
    event_accum = event_accum.AddRunsFromDirectory(log_dir)

# load
event_accum.Reload()  # this might take a bit, depending on number of runs

# event_accum = {index: event_multiplexer\
#                .EventMultiplexer()\
#                .AddRunsFromDirectory(log_dir)\
#                .Reload()
#                for index, log_dir in enumerate(log_parent_dirs)}

INFO:tensorflow:Event Multiplexer initializing.
INFO:tensorflow:Event Multiplexer done initializing
INFO:tensorflow:Starting AddRunsFromDirectory: ./experiments/logdir_exper_4_20_GRU/
INFO:tensorflow:Adding events from directory ./experiments/logdir_exper_4_20_GRU/GRU,bidir=F,noisy_tanh,learn_p=T,noise_alpha=1.15,noise_half_normal=F,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=128,l2_wieght_reg=1.0E-03,target_rep_weight=0.3
INFO:tensorflow:Constructing EventAccumulator for ./experiments/logdir_exper_4_20_GRU/GRU,bidir=F,noisy_tanh,learn_p=T,noise_alpha=1.15,noise_half_normal=F,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=128,l2_wieght_reg=1.0E-03,target_rep_weight=0.3
INFO:tensorflow:Adding events from directory ./experiments/logdir_exper_4_20_GRU/GRU,bidir=F,noisy_tanh,learn_p=T,noise_alpha=0.9,noise_half_normal=F,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=64,l2_wieght_reg=1.0E-0

<tensorflow.python.summary.event_multiplexer.EventMultiplexer at 0x7f37be665e48>

In [5]:
print('='*50)
print('Done loading {} Tensorboard runs'.format(len(event_accum.Runs())))
print('='*50)

Done loading 108 Tensorboard runs


In [6]:
# get a list of all subfolders in the parent log dir
child_dir = [sub_dir
             for log_dir in log_parent_dirs 
             for sub_dir in next(os.walk(log_dir))[1]]

In [7]:
parent_dir_metrics = [metrics_dict_from_log(log_dir, event_accum)
                      for log_dir in child_dir]

In [8]:
exper_metrics = pd.DataFrame(parent_dir_metrics)
# correct the Model_str column to index
# exper_metrics.set_index(keys='Model_str', 
#                         inplace=True, verify_integrity=True)

# sort rows by value
exper_metrics.sort_values(by='MRR @ optimal MRR', ascending=False, 
                          inplace=True)
# exper_metrics.sort_values(by='Cost @ optimal cost', ascending=True, 
#                           inplace=True)

exper_metrics = exper_metrics\
    .style.background_gradient(
    cmap='spring', low=.5, high=0)\
    .format(  # format all float values
        {col: '{:.2%}' 
         for col in exper_metrics.columns 
         if any(word in col
                for word in ['Accuracy @', 
                             'MRR @', 
                             'Cost @'])})\
    .format(
        {'Cost @ optimal cost' : '{:.3f}', 
             'Cost @ optimal MRR' : '{:.3f}', 
             'Cost @ optimal accuracy' : '{:.3f}'})\
    .apply(lambda x: ["background: greenyellow" # color str cells based on their model type, hacky I know
                      if isinstance(v, str) and 'GRU,bidir=F' in v 
                      else "background: hotpink" if isinstance(v, str) and 'GRU,bidir=T' in v 
                      else "background: coral" if isinstance(v, str) and 'LSTM,bidir=F' in v 
                      else "background: olive" if isinstance(v, str) and 'LSTM,bidir=T' in v 
                      else "" for v in x], 
           axis = 1)
    
df_style = exper_metrics.export()
# can reuse styles with
# Styler.use(exper_metrics.export())

In [9]:
exper_metrics
# GRU seems to the highest performer
# GRU bidir is not far behind
# LSTM comes slightly after
# LSTM bidir seems to be the clear underperformer

Unnamed: 0,Model_str,Cost @ optimal cost,MRR @ optimal cost,Accuracy @ optimal cost,step @ optimal cost,Cost @ optimal MRR,MRR @ optimal MRR,Accuracy @ optimal MRR,step @ optimal MRR,Cost @ optimal accuracy,MRR @ optimal accuracy,Accuracy @ optimal accuracy,step @ optimal accuracy
23,"GRU,bidir=F,noisy_tanh,learn_p=F,noise_alpha=0.9,noise_half_normal=F,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=128,l2_wieght_reg=1.0E-03,target_rep_weight=0.3",0.378,96.66%,95.89%,1000,0.385,96.86%,96.16%,990,0.385,96.86%,96.16%,990
20,"GRU,bidir=F,noisy_tanh,learn_p=F,noise_alpha=0.9,noise_half_normal=T,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=128,l2_wieght_reg=1.0E-03,target_rep_weight=0.3",0.445,96.73%,95.62%,900,0.445,96.73%,95.62%,900,0.445,96.73%,95.62%,900
7,"GRU,bidir=F,noisy_tanh,learn_p=F,noise_alpha=1.15,noise_half_normal=F,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=128,l2_wieght_reg=1.0E-04,target_rep_weight=0.3",0.361,96.32%,95.07%,740,0.363,96.58%,95.62%,770,0.383,96.50%,95.62%,680
44,"GRU,bidir=T,noisy_tanh,learn_p=T,noise_alpha=0.9,noise_half_normal=T,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=128,l2_wieght_reg=1.0E-03,target_rep_weight=0.3",0.389,96.32%,95.62%,720,0.396,96.46%,95.62%,740,0.393,96.29%,95.62%,700
5,"GRU,bidir=F,noisy_tanh,learn_p=T,noise_alpha=0.9,noise_half_normal=T,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=128,l2_wieght_reg=1.0E-03,target_rep_weight=0.3",0.418,96.19%,94.79%,670,0.435,96.42%,95.34%,700,0.435,96.42%,95.34%,700
25,"GRU,bidir=F,noisy_tanh,learn_p=F,noise_alpha=0.9,noise_half_normal=F,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=128,l2_wieght_reg=1.0E-04,target_rep_weight=0.3",0.356,96.21%,94.79%,720,0.378,96.40%,95.62%,940,0.378,96.39%,95.62%,920
98,"LSTM,bidir=F,noisy_tanh,learn_p=F,noise_alpha=1.15,noise_half_normal=F,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=128,l2_wieght_reg=1.0E-04,target_rep_weight=0.3",0.477,96.20%,95.07%,690,0.528,96.39%,95.62%,820,0.521,96.30%,95.62%,760
68,"GRU,bidir=T,noisy_tanh,learn_p=F,noise_alpha=1.15,noise_half_normal=T,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=128,l2_wieght_reg=1.0E-04,target_rep_weight=0.3",0.264,96.37%,95.07%,970,0.264,96.37%,95.07%,970,0.264,96.37%,95.07%,970
99,"LSTM,bidir=F,noisy_tanh,learn_p=F,noise_alpha=1.15,noise_half_normal=F,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=64,l2_wieght_reg=1.0E-04,target_rep_weight=0.3",0.457,96.12%,94.79%,860,0.493,96.37%,95.34%,430,0.493,96.37%,95.34%,430
14,"GRU,bidir=F,noisy_tanh,learn_p=T,noise_alpha=0.9,noise_half_normal=F,keep_infreq_labels=F,learn_rate=1.0E-02,keep_prob=0.7,one_hot,hidden_state_size=64,l2_wieght_reg=1.0E-03,target_rep_weight=0.3",0.487,95.54%,93.70%,650,0.505,96.19%,95.07%,860,0.542,95.98%,95.07%,850
