In [48]:
import numpy as np
import pandas as pd
import pickle

data_folder = 'output_tsne'

In [49]:
import plotly.plotly as py
import plotly.graph_objs as go

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [50]:
import ipywidgets as widgets
from ipywidgets import interact, interactive_output

In [51]:
def load_df(dataset_name):
    if dataset_name == "": return None
    
    in_name = '{}/tsne_{}.pickle'.format(data_folder, dataset_name)
    pkl_data = pickle.load(open(in_name, 'rb'))
    embeddeds = pkl_data['results']
    df = pd.DataFrame.from_records(embeddeds, exclude=["embedding"])
    return df

# global dataframe
g_df = load_df(dataset_name='MNIST-SMALL')

In [52]:
def plot_surface(pivot_data, z_title='', chart_title=''):
    perps = pivot_data.columns
    lrs = pivot_data.transpose().columns
    data = [
        go.Surface(
            x = perps,
            y = lrs,
            z=pivot_data.as_matrix(),
        )
    ]
    layout = go.Layout(
        title=chart_title,
        autosize=True,
        width=800,
        height=800,
        margin=dict(
            l=65,
            r=50,
            b=65,
            t=90
        ),
        scene = dict(
            xaxis = dict(
                title='Perplexity'),
            yaxis = dict(
                title='Learning rate'),
            zaxis = dict(
                title=z_title),)   
    )
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)

In [53]:
def plot_lines_by_key(key_name, key_val, use_log):
    global g_df
    if g_df is None: return
    
    if key_name == 'perplexity':
        df_by_key = g_df[g_df[key_name]==key_val]
        x_data = np.log(df_by_key['learning_rate']) if use_log else df_by_key['learning_rate']
    else: # learning_rate
        df_by_key = g_df[g_df[key_name]==key_val]
        x_data = np.log(df_by_key['perplexity']) if use_log else df_by_key['perplexity']
    
    
    line_names = [
        # (line_name, display_name, show_by_default)
        ('q_link', '-log(links) in low dim.', True),
        ('p_link', '-log(links) in high dim.', False),
        ('loss', 'KL loss', True),
        ('auc_rnx', 'logRNX', True),
        ('cca_stress', 'CCA Stress', False),
        ('mds_isotonic', 'MDS Stress', False),
        ('pearsonr', 'Corr. Coef.', True),
        ('sammon_nlm', 'Sammon NLM', False)
    ]
    
    plot_data = []
    for line_name, display_name, show_by_default in line_names:
        trace_i = go.Scatter(
            x = x_data,
            y = df_by_key[line_name],
            name = display_name,
            mode = 'lines+markers',
            line = {'shape': 'spline'},
            visible= '' if show_by_default else 'legendonly'
        )
        plot_data.append(trace_i)

    layout = dict(title = 'Metrics and log likelihood values for constrained links',
          xaxis = dict(title = '{} {}'.format(
              'Log of ' if use_log else '',
              'Learning rate' if key_name == 'perplexity' else 'Perplexity')),
          yaxis = dict(title = 'Metrics value / negative LL'),
          )
    
    fig = go.Figure(data=plot_data, layout=layout)
    iplot(fig)

In [54]:
# UI controls

datasetX = widgets.Dropdown(
    options={
        "Select dataset": "",
        "MNIST mini": "MNIST-SMALL",
        "COIL-20": "COIL20",
        "Country Indicators 1999": "COUNTRY1999",
        "Country Indicators 2013": "COUNTRY2013",
        "Country Indicators 2014": "COUNTRY2014",
        "Country Indicators 2015": "COUNTRY2015",
        "Cars and Trucks 2004": "CARS04",
        "Breast Cancer Wisconsin (Diagnostic)": "BREAST-CANCER95",
        "Pima Indians Diabetes": "DIABETES",
        "Multidimensional Poverty Measures": "MPI"
    },
    value='',
    description='Dataset:',
)

pivotKeyX = widgets.Dropdown(
    options={
        'NLL in LD': 'q_link',
        'NLL in HD': 'p_link',
        'KL-loss': 'loss',
        'AUC R_NX': 'auc_rnx',
        'CorrCoef': 'pearsonr',
        'MDS Stress': 'mds_isotonic',
        'CCA Stress': 'cca_stress',
        'Sammon NLM': 'sammon_nlm'

    },
    value='q_link',
    description='Surface: ',
)

lrX = widgets.SelectionSlider(
    options=g_df['learning_rate'].unique(),
    value=100.0,
    description='LR ($\lambda$): '
)

perpX = widgets.SelectionSlider(
    options=g_df['perplexity'].unique(),
    value=50.0,
    description='Perplexity: '
)

useLogX = widgets.Checkbox(
    description='Log scale in xAxis',
    value=False,
)

In [55]:
# interactive plot

@interact(dataset_name=datasetX)
def select_dataset(dataset_name):   
    global g_df
    g_df = load_df(dataset_name)
    
    @interact(key_to_pivot=pivotKeyX)
    def plot_surface_by_name(key_to_pivot='q_link'):
        global g_df
        if g_df is None: return

        pivot_df = g_df[['learning_rate', 'perplexity', key_to_pivot]].pivot(
            index='learning_rate', columns='perplexity', values=key_to_pivot)

        plot_surface(pivot_data=pivot_df, chart_title=key_to_pivot)

A Jupyter Widget

In [56]:
def dashboard(dataset_name, lr, perp, use_log):
    global g_df
    g_df = load_df(dataset_name)
    
    plot_lines_by_key(key_name='learning_rate', key_val=lr, use_log=use_log)
    plot_lines_by_key(key_name='perplexity', key_val=perp, use_log=use_log)
    
ui = widgets.VBox([
    widgets.HBox([datasetX, useLogX]),
    widgets.HBox([lrX, perpX])
])

out = widgets.interactive_output(dashboard, 
               {'dataset_name': datasetX,
                'lr':lrX,
                'perp':perpX,
                'use_log': useLogX})

display(ui, out)

A Jupyter Widget

A Jupyter Widget