In [3]:
# %load /home/jonathan/.ipython/profile_default/startup/01-setup.py
# start up settings for jupyter notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os, sys

# use plt.style.available() to check out available styles
plt.style.use('seaborn-whitegrid')
plt.rcParams['font.size'] = 15.0
plt.rcParams['axes.labelsize'] = 15.0
plt.rcParams['xtick.labelsize'] = 15.0
plt.rcParams['ytick.labelsize'] = 15.0
plt.rcParams['legend.fontsize'] = 15.0

%matplotlib inline

# set the max column width
pd.options.display.max_colwidth = 1000

# to avoid have warnings from chained assignments
pd.options.mode.chained_assignment = None


In [4]:
import podspy, json

In [None]:
TRACE_STATS_ENRICHED_FILENAME = 'trace-stats-enriched.csv'
TRACE_STATS_FILENAME = 'trace-stats.csv'
CONFIGS_JSON_FILENAME = 'configs.json'
REPLAY_ID = 'replay_id'
ALIGNMENT_FILENAME = 'alignment_filename'

# things to ignore from configs
OUTFILE_KEY = 'outFile'
RESULTDIR_KEY = 'resultDir'
LOGPATH_KEY = 'logPath'
MODELPATH_KEY = 'modelPath'
TO_IGNORE_KEY_LIST = [
    OUTFILE_KEY, 
    RESULTDIR_KEY,
    LOGPATH_KEY,
    MODELPATH_KEY
]

def get_caseids(alignment_fpath):
    with open(alignment_fpath, 'r') as f:
        lines = f.readlines()
        caseids = lines[5]
        caseid_list = caseids.split(',')
        caseid_list = list(map(lambda cid: cid.strip(), caseid_list))
    return caseid_list
        

def mono_read_trace_stats(replay_id, replay_dirpath):
    trace_stats_fpath = os.path.join(replay_dirpath, TRACE_STATS_ENRICHED_FILENAME)
    configs_fpath = os.path.join(replay_dirpath, CONFIGS_JSON_FILENAME)
    alignment_dirpath = os.path.join(replay_dirpath, 'alignment')
    
    with open(configs_fpath, 'r') as f:
        configs_dict = json.load(f)
        
    for key in TO_IGNORE_KEY_LIST:
        del configs_dict[key]
        
    trace_stats_df = pd.read_csv(trace_stats_fpath)
    trace_stats_df[REPLAY_ID] = replay_id
    
    for key, item in configs_dict.items():
        trace_stats_df[key] = item
    
    alignment_caseid_list = []
    
    for fname in os.listdir(alignment_dirpath):
        alignment_fpath = os.path.join(alignment_dirpath, fname)
        alignment_id = fname.replace('.csv', '')
        caseid_list = get_caseids(alignment_fpath)
        alignment_caseid_list_i = zip(itls.repeat(alignment_id), caseid_list)
        alignment_caseid_list = alignment_caseid_list + list(alignment_caseid_list_i)
    
    alignment_caseid_df = pd.DataFrame(alignment_caseid_list, columns=[ALIGNMENT_FILENAME, 'SP label'])
    trace_stats_df = trace_stats_df.merge(alignment_caseid_df, on='SP label', suffixes=('', ''))
    
    return trace_stats_df


def reco_read_trace_stats(replay_id, replay_dirpath):
    trace_stats_fpath = os.path.join(replay_dirpath, TRACE_STATS_FILENAME)
    configs_fpath = os.path.join(replay_dirpath, CONFIGS_JSON_FILENAME)
    alignment_dirpath = os.path.join(replay_dirpath, 'alignments')
    
    with open(configs_fpath, 'r') as f:
        configs_dict = json.load(f)
        
    for key in TO_IGNORE_KEY_LIST:
        del configs_dict[key]
        
    trace_stats_df = pd.read_csv(trace_stats_fpath)
    trace_stats_df[REPLAY_ID] = replay_id
    
    for key, item in configs_dict.items():
        trace_stats_df[key] = item
    
    alignment_caseid_list = []
    
    for fname in os.listdir(alignment_dirpath):
        alignment_fpath = os.path.join(alignment_dirpath, fname)
        alignment_id = fname.replace('.csv', '')
        caseid_list = get_caseids(alignment_fpath)
        alignment_caseid_list_i = zip(itls.repeat(alignment_id), caseid_list)
        alignment_caseid_list = alignment_caseid_list + list(alignment_caseid_list_i)
    
    alignment_caseid_df = pd.DataFrame(alignment_caseid_list, columns=[ALIGNMENT_FILENAME, 'SP label'])
    trace_stats_df = trace_stats_df.merge(alignment_caseid_df, on='SP label', suffixes=('', ''))
        
    return trace_stats_df