In [None]:
import tensorboard as tb
from tbparse import SummaryReader
import pandas as pd
import os
import time

In [None]:
def get_logs(run_name, base='../tensorboard_log/cs', pivot=True, extra_columns=None):
    f = os.path.join(base, run_name)
    return SummaryReader(f, pivot=pivot, extra_columns=extra_columns)

def parse_scalars(reader):
    """
    parses the scalar dataframe for a SummaryReader object.
    extracts first element from wall_time column and removes prefixes from column names
    """
    scalars = reader.scalars
    if 'wall_time' in scalars.columns:
        # convert wall time list into single number column, just use first value
        scalars['wall_time'] = [x[0] for x in scalars['wall_time'].tolist()]
    from_columns = scalars.columns.tolist()
    to_columns = [x.replace('my-stats/', '').replace('train/', '').replace('time/', '') for x in from_columns]
    scalars = scalars.rename(columns=dict(zip(from_columns, to_columns)))
    return scalars

def get_filenames(model, run_cfg, base='../tensorboard_log/cs'):
    """
    gets filename base on model and run_cfg filter and sorted them by datestr (3rd element in _.split)
    removes invalid names that do not match the filter conditions
    """
    items = []
    timestamps = []
    for filename in os.listdir(base):
        f = os.path.join(base, filename)
        if not os.path.isfile(f):
            fn_parts = filename.split('_')
            if parts_match_filter(fn_parts, model, run_cfg):
                items.append(filename)
                timestamps.append(int(fn_parts[2]))
    sorted_items = [x for _, x in sorted(zip(timestamps, items))]
    return sorted_items

def parts_match_filter(parts, model, run_cfg):
    if len(parts) < 3:
        return False
    if parts[0] != model:
        return False
    if parts[1] != run_cfg:
        return False
    return True

def fix_counters(dataframes):
    """
    Adds max. step / episode from previous df to next to have continuous values
    """
    # correct step and episode counters
    start_step = 0
    start_ep   = 0
    for d in dataframes:
        d['step'] += start_step
        d['episode'] += start_ep
        start_step = d.iloc[-1]['step']
        start_ep   = d.iloc[-1]['episode']
        d['step'] = d['step'].astype('int')
        d['episode'] = d['episode'].astype('int')                                     
    return dataframes

def load_dataframes(filenames):
    """
    loads a set of tensorboard logs into scalar dataframes
    """
    data = []
    for fn in filenames:
        reader = get_logs(fn, extra_columns={'wall_time', })
        sc = parse_scalars(reader)
        # print(f"loaded {sc.shape[0]} rows from {fn}")
        data.append(sc)
    return data

def merge_data(dataframes, add_date_column=False):
    df_all = pd.concat(dataframes).reset_index(drop=True)
    df_all['wall_time'] = df_all['wall_time'].astype('int')
    if add_date_column:
        df_all['date'] = pd.to_datetime(df_all['wall_time'], unit='s').astype('datetime64[s]')
    return df_all

In [None]:
def combine_tensorlogs(model, cfg, base_path='../tensorboard_log/cs', target_path='../logs_parsed', add_date=False):
    print(f"Combine Logs for {model}_{cfg}")
    start = time.time()
    filenames = get_filenames(model, cfg)
    
    if len(filenames) == 0:
        print(f"No files found - skipping")
        return
    
    x = load_dataframes(filenames)
    x = fix_counters(x)
    x = merge_data(x, add_date_column=add_date)

    x.to_csv(f"{target_path}/{model}_{cfg}.csv")
    print(f"Combine Logs for {model}_{cfg} done in {(time.time() - start):.1f}s")
        
    return x

In [None]:
cfgs = ['V201', 'V202', 'V203', 'V204', 'V205', 'V206', 'V207', 'V208', 'V209', 'V210', 'V211']

In [None]:
models = ['A2C']
run_configs = ['V208']
model = models[0]
run_cfg = run_configs[0]

In [None]:
for cfg in cfgs:
    df = combine_tensorlogs("A2C", cfg, add_date=True)

In [None]:
run_cfg = 'V210'

filenames

In [None]:
x

In [None]:

# all_data['date'] = pd.to_datetime(all_data['wall_time'], unit='s').astype('datetime64[s]')

In [None]:
all_data.to_csv('test.csv')