## Importing

In [1]:
import os
%cd ..
print(os.getcwd())
import sys
sys.path.append('.')

import pandas as pd
import numpy as np
from glob import glob
import statsmodels.api as sm
import re

# For panel visualizations:
import panel as pn

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# so that panel can show plotly figures
pn.extension('plotly')


C:\Users\nguye\Documents\PBS\Research\extended-event-modeling
C:\Users\nguye\Documents\PBS\Research\extended-event-modeling


In [2]:
colors = ["lightseagreen", "mediumpurple", "goldenrod",
          'darkred', 'black',
          'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue',
          'chartreuse', 'chocolate', 'coral', 'cornflowerblue',
          'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan',
          'darkgoldenrod', 'darkgray', 'darkgrey', 'darkgreen',
          'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange',
          'darkorchid', 'darkred', 'darksalmon', 'darkseagreen',
          'darkslateblue', 'darkslategray', 'darkslategrey']

## Initialize Variables and Widgets

In [3]:
df = pd.read_csv('output/run_sem/results_purity_coverage.csv')
old_path = '../extended-event-modeling-bk/extended-event-modeling/output/run_sem/results_purity_coverage.csv'
if os.path.exists(old_path):
    df_old = pd.read_csv(old_path)
    df = pd.concat([df, df_old], axis=0)
df = df.dropna(axis=0)

df['epoch'] = df['epoch'].astype(int)
df['chapter'] = df['run'].apply(lambda x: int(x[2]))
df['entropy_abs'] = df['entropy'] * np.log(df['n_event_models'])
df['config'] = df['tag'].apply(lambda tag: tag[:tag.find("".join(re.findall("1[0-9][0-9]0", tag)))])
df['number_boundaries'] = df['number_boundaries'].clip(upper=400)


years = ['jan', 'feb', 'mar', 'april', 'may', 'june', 'july', 'aug', 'sep', 'oct', 'nov', 'dec']
tags = sum([glob(f'output/run_sem/{y}*') for y in years], [])
tags = [t.replace('\\', '/') for t in tags]
tags = [x.split('/')[-1] for x in tags if any(i.isdigit() for i in x)]
available_tags = sorted(tags)
# available_tags = sorted(list(df.tag.unique()))
default_tag = available_tags[0]
multi_tag_select = pn.widgets.MultiSelect(name='Select 2 or 3 Tags for Comparison', value=[default_tag], options=available_tags, height=100, width=500)

available_metrics = sorted(list(df.columns))
default_metric = ['mean_pe', 'n_event_models', 'active_event_models', 'entropy', 'pearson_r', 'bicorr', 'percentile', 'number_boundaries',
                  'purity', 'coverage']
multi_metric_select = pn.widgets.MultiSelect(name='Select Metrics to Visualize', value=default_metric, options=available_metrics, height=100)

available_runs = sorted(list(df['run'].unique()))
default_run = available_runs
available_train_runs = list(df[(df['is_train'] == True)]['run'].unique())
default_train_run = available_train_runs
available_valid_runs = list(df[(df['is_train'] == False)]['run'].unique())
default_valid_run = available_valid_runs
multi_run_select = pn.widgets.MultiSelect(name='Select Runs to Visualize', value=default_run, options=available_runs, height=100)
multi_train_run_select = pn.widgets.MultiSelect(name='Select train_runs to Visualize', value=default_train_run,
                                                options=available_train_runs,
                                                height=100)
multi_valid_run_select = pn.widgets.MultiSelect(name='Select valid_runs to Visualize', value=default_valid_run,
                                                options=available_valid_runs,
                                                height=100)

train_checkbox = pn.widgets.Checkbox(name='Display Train?', value=True)
valid_checkbox = pn.widgets.Checkbox(name='Display Valid?', value=False)

hues = ['tag', 'chapter', 'run', 'config', 'is_train', 'grain']
hue_select = pn.widgets.Select(name='Select Hue', options=hues, value='tag')

splits = ['tag', 'chapter', 'run', 'config', 'is_train', 'grain']
split_select = pn.widgets.Select(name='Select Split', options=splits, value='is_train')

min_epoch_slider = pn.widgets.DiscreteSlider(name='Select Min Epoch',
                                             options=list(range(df['epoch'].min(), df['epoch'].max()+1)), value=df['epoch'].min())
max_epoch_slider = pn.widgets.DiscreteSlider(name='Select Max Epoch',
                                             options=list(range(df['epoch'].min(), df['epoch'].max()+1)), value=df['epoch'].max())

In [4]:
alfas = sorted(list(set([x.split('_')[-2] for x in available_tags])))
lmdas = sorted(list(set([x.split('_')[-1] for x in available_tags])))
multi_alfa_select = pn.widgets.MultiSelect(name='Select Alfas', value=[alfas[0]], options=alfas)
multi_lmda_select = pn.widgets.MultiSelect(name='Select Lmdas', value=[lmdas[0]], options=lmdas)
available_configs = list(df.config.unique())
default_config = available_configs[0]
multi_config_select = pn.widgets.MultiSelect(name='Select Configs', value=[default_config], options=available_configs)


@pn.depends(multi_alfa_select, multi_lmda_select, multi_config_select)
def alfa_lmda_to_tag(multi_alfa_select, multi_lmda_select, multi_config_select):
    select_tags = []
    for alfa in multi_alfa_select:
        for lmda in multi_lmda_select:
            for config in multi_config_select:
                select1 = f'{alfa}_{lmda}'
                select2 = f'{config}'
                # select_tags.extend([t for t in available_tags if select1 in t and select2 in t and select3 in t])
                select_tags.extend([t for t in available_tags if select1 in t and select2 in t])
    multi_tag_select.value = sorted(select_tags)

## Define dashboard and Fire

In [7]:
import param


class ActionExample(param.Parameterized):
    """
    Demonstrates how to use param.Action to trigger an update.
    """
    def get_df_select(self):
        print(f'Update data: \n'
              f'tags={multi_tag_select.value} \n'
              f'runs={multi_run_select.value}')
        all_runs = multi_train_run_select.value + multi_valid_run_select.value
        df_select = df[(df['tag'].isin(multi_tag_select.value)) & (df['run'].isin(all_runs))
                       & (df['epoch'] <= max_epoch_slider.value) & (df['epoch'] >= min_epoch_slider.value)
                       ]
        if not (train_checkbox.value and valid_checkbox.value):
            if train_checkbox.value:
                df_select = df_select[(df_select['is_train'] == True)]
            elif valid_checkbox.value:
                df_select = df_select[(df_select['is_train'] == False)]
        print(f"Length of df_select={len(df_select)}")
        return df_select

    action_matrix = param.Action(lambda x: x.param.trigger('action_matrix'), label='Update Matrix pane')
    @param.depends('action_matrix')
    def matrix_visualize(self):
        df_select = self.get_df_select()
        tags = sorted(list(df_select['tag'].unique()))
        print(f'Tags for plotting Matrix: {tags}')
        if len(tags) == 0:
            return None
        metrics = [x for x in multi_metric_select.value if x != 'epoch']
        # lowess will return our "smoothed" data with a y value for at every x-value
        # split_select.options == hue_select.options
        df_select_long = pd.melt(df_select, id_vars=['epoch'] + list(split_select.options), value_vars=metrics)
        unique_columns = list(df_select_long[f"{split_select.value}"].unique())
        unique_hues = list(df_select_long[f"{hue_select.value}"].unique())
        fig = make_subplots(rows=len(metrics), cols=len(unique_columns),
                            # subplot_titles=tuple(metrics),
                            shared_xaxes=False,
                            vertical_spacing=0.02,
                            horizontal_spacing=0.1,
                            # x_title='Amount of Training (videos)'
                            )
        print(unique_columns)
        print(unique_hues)
        for i, column in enumerate(unique_columns):
            for j, hue in enumerate(unique_hues):
                for k, m in enumerate(metrics):
                    df_trace = df_select_long[(df_select_long[f"{hue_select.value}"] == hue)
                    & (df_select_long[f"{split_select.value}"] == column)]
                    x = df_trace[df_trace.variable == f'{m}'].epoch
                    # jitter a bit to avoid lowess returning nan (#unique x < polynomial degree or it param)
                    x = x + np.random.randn(len(x)) * 0.01
                    y = df_trace[df_trace.variable == f'{m}'].value
                    run = df_trace[df_trace.variable == f'{m}'].run
                    lowess = sm.nonparametric.lowess(y, x, frac=3.0/4.0)
                    # unpack the lowess smoothed points to their values
                    lowess_x = list(zip(*lowess))[0]
                    lowess_y = list(zip(*lowess))[1]
                    # group so that click behavior apply to all.
                    fig.add_trace(go.Scatter(x=lowess_x, y=lowess_y, mode="lines", line_color=f'{colors[j]}', hovertext=run, name=f"{hue}", legendgroup=f"{hue}", showlegend=False,
                                             ), row=k+1, col=i+1)
                    fig.add_trace(go.Scatter(x=x, y=y, mode="markers", marker_color=f'{colors[j]}', hovertext=run, name=f"{hue}", legendgroup=f"{hue}", showlegend=(i==0 and k==0)), row=k+1, col=i+1)
                    fig.update_yaxes(title_text=f"{m}", row=k+1, col=i+1)
        fig.update_layout(showlegend=True, height=500*len(metrics), width=500*len(unique_columns) + 400)
        fig.update_xaxes(title_text="Amount of Training (videos)")

        return fig

    action_pe = param.Action(lambda x: x.param.trigger('action_pe'), label='Update PE pane')
    @param.depends('action_pe')
    def plot_pe(self):
        df_select = self.get_df_select()
        tags = sorted(list(df_select['tag'].unique()))
        if len(tags) == 0:
            return None

        fig = px.scatter(data_frame=df_select, x="epoch", y="mean_pe", facet_col="tag", trendline="ols")
        fig.update_layout(width=800 * len(tags))

        return fig


In [8]:
action_matrix = ActionExample()

dashboard = pn.Column(
    pn.Row('Compare Scatter Matrices Across Configurations'),
    pn.Row(multi_alfa_select, multi_lmda_select, multi_config_select),
    pn.Row(multi_metric_select, alfa_lmda_to_tag, multi_tag_select, multi_train_run_select, multi_valid_run_select),
    pn.Row(min_epoch_slider, max_epoch_slider),
    pn.Row(
            # action_matrix.param.action,
            train_checkbox,
            valid_checkbox,
            hue_select,
            split_select),
    pn.Column(action_matrix.param.action_matrix, action_matrix.matrix_visualize),
    pn.Column(action_matrix.param.action_pe, action_matrix.plot_pe),
)

# Launch the dashboard
dashboard.servable()

Update data: 
tags=['sep_09_n15_1030_1E-03_1E-01_1E+07'] 
runs=['1.1.10_kinect', '1.1.1_kinect', '1.1.2_kinect', '1.1.3_kinect', '1.1.4_kinect', '1.1.5_kinect', '1.1.6_kinect', '1.1.7_kinect', '1.1.8_kinect', '1.1.9_kinect', '1.2.10_kinect', '1.2.1_kinect', '1.2.3_kinect', '1.2.4_kinect', '1.2.6_kinect', '1.2.7_kinect', '1.2.8_kinect', '1.2.9_kinect', '1.3.10_kinect', '1.3.1_kinect', '1.3.3_kinect', '1.3.4_kinect', '1.3.5_kinect', '1.3.6_kinect', '1.3.7_kinect', '1.3.8_kinect', '1.3.9_kinect', '2.2.10_kinect', '2.2.1_kinect', '2.2.2_kinect', '2.2.3_kinect', '2.2.4_kinect', '2.2.5_kinect', '2.2.6_kinect', '2.2.7_kinect', '2.2.8_kinect', '2.2.9_kinect', '2.3.10_kinect', '2.3.1_kinect', '2.3.2_kinect', '2.3.3_kinect', '2.3.4_kinect', '2.3.5_kinect', '2.3.6_kinect', '2.3.7_kinect', '2.3.8_kinect', '2.3.9_kinect', '2.4.10_kinect', '2.4.1_kinect', '2.4.2_kinect', '2.4.3_kinect', '2.4.4_kinect', '2.4.5_kinect', '2.4.6_kinect', '2.4.7_kinect', '2.4.8_kinect', '2.4.9_kinect', '3.1.10_kinect', '

## Cells to test functions while assigning widget values

In [75]:
# df = pd.read_csv('output/run_sem/results_purity_coverage.csv')
# multi_tag_select.value = ['oct_13_refactor_seed1080_1E-03_1E-01_1E+07']
# train_checkbox.value = False
# valid_checkbox.value = True
# df_select = action_matrix.get_df_select()
# df_select = df_select.dropna(axis=0)
# metrics = default_metric
# df_select_long = pd.melt(df_select, id_vars=['epoch', 'tag', 'run'], value_vars=metrics)
# df_select_long = df_select_long.dropna(axis=0)
# fig = make_subplots(rows=len(metrics), cols=1, subplot_titles=tuple(metrics),
#                     shared_xaxes=False,
#                     # x_title='Amount of Training (videos)'
#                     )
# import statsmodels.api as sm
# for i, m in enumerate(metrics):
#     if m == 'epoch':
#         continue
#     x = df_select_long[df_select_long.variable == f'{m}'].epoch
#     y = df_select_long[df_select_long.variable == f'{m}'].value
#     run = df_select_long[df_select_long.variable == f'{m}'].run
#     lowess = sm.nonparametric.lowess(y, x, frac=.3)
#     print(f'{m}, {lowess}')
#     # unpack the lowess smoothed points to their values
#     lowess_x = list(zip(*lowess))[0]
#     lowess_y = list(zip(*lowess))[1]
#     fig.add_trace(go.Scatter(x=lowess_x, y=lowess_y, mode="lines", line_color=f'{colors[i]}', hovertext=run, name=""), row=i+1, col=1)

Update data: 
tags=['oct_13_refactor_seed1080_1E-03_1E-01_1E+07'] 
runs=['1.1.10_kinect', '1.1.1_kinect', '1.1.2_kinect', '1.1.3_kinect', '1.1.4_kinect', '1.1.5_kinect', '1.1.6_kinect', '1.1.7_kinect', '1.1.8_kinect', '1.1.9_kinect', '1.2.10_kinect', '1.2.1_kinect', '1.2.3_kinect', '1.2.4_kinect', '1.2.6_kinect', '1.2.7_kinect', '1.2.8_kinect', '1.2.9_kinect', '1.3.10_kinect', '1.3.1_kinect', '1.3.3_kinect', '1.3.4_kinect', '1.3.5_kinect', '1.3.6_kinect', '1.3.7_kinect', '1.3.8_kinect', '1.3.9_kinect', '2.2.10_kinect', '2.2.1_kinect', '2.2.2_kinect', '2.2.3_kinect', '2.2.4_kinect', '2.2.5_kinect', '2.2.6_kinect', '2.2.7_kinect', '2.2.8_kinect', '2.2.9_kinect', '2.3.10_kinect', '2.3.1_kinect', '2.3.2_kinect', '2.3.3_kinect', '2.3.4_kinect', '2.3.5_kinect', '2.3.6_kinect', '2.3.7_kinect', '2.3.8_kinect', '2.3.9_kinect', '2.4.10_kinect', '2.4.1_kinect', '2.4.2_kinect', '2.4.3_kinect', '2.4.4_kinect', '2.4.5_kinect', '2.4.6_kinect', '2.4.7_kinect', '2.4.8_kinect', '2.4.9_kinect', '3.1.10_k

In [84]:
# testing Lowess
# m = 'mean_pe'
# x = df_select_long[df_select_long.variable == f'{m}'].epoch
# x = x + np.random.randn(len(x)) * 0.01
# y = df_select_long[df_select_long.variable == f'{m}'].value
# run = df_select_long[df_select_long.variable == f'{m}'].run
# lowess = sm.nonparametric.lowess(y, x, frac=.3)
# x, y, lowess
