In [1]:
import numpy as np
import pandas as pd
import pickle

data_folder = 'output_tsne'

In [2]:
import plotly.plotly as py
import plotly.graph_objs as go

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [3]:
import ipywidgets as widgets
from ipywidgets import interact, interactive_output

In [4]:
def load_df(dataset_name):
    if dataset_name == "": return None
    
    in_name = '{}/tsne_{}.pkl'.format(data_folder, dataset_name)
    pkl_data = pickle.load(open(in_name, 'rb'))
    embeddeds = pkl_data['results']
    df = pd.DataFrame.from_records(embeddeds, exclude=["embedding"])
    return df

# global dataframe
g_df = load_df('MNIST-SMALL')

In [18]:
def _plot_surface(pivot_data, z_title='', chart_title=''):
    perps = pivot_data.columns
    lrs = pivot_data.transpose().columns
    data = [
        go.Surface(
            x = perps,
            y = lrs,
            z=pivot_data.as_matrix(),
        )
    ]
    layout = go.Layout(
        title=chart_title,
        autosize=True,
        width=600,
        height=600,
        margin=dict(
            l=65,
            r=50,
            b=65,
            t=90
        ),
        scene = dict(
            xaxis = dict(
                title='Perplexity'),
            yaxis = dict(
                title='Learning rate'),
            zaxis = dict(
                title=z_title),
        ),
    )
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)

In [23]:
def _plot_lines_by_key(key_name, key_val, use_log_x, use_log_y, show_inverse, show_neg):
    global g_df
    if g_df is None: return
    
    if key_name == 'perplexity':
        df_by_key = g_df[g_df[key_name]==key_val]
        x_data = np.log(df_by_key['learning_rate']) if use_log_x else df_by_key['learning_rate']
    else: # learning_rate
        df_by_key = g_df[g_df[key_name]==key_val]
        x_data = np.log(df_by_key['perplexity']) if use_log_x else df_by_key['perplexity']
    
    # customized functions that can be applied onto the y-values
    _inverse = lambda y: np.divide(1.0, y, out=np.zeros_like(y), where=(y != 0))
    _negative = lambda y: -y
    
    line_names = [
        # (line_name, display_name, show_by_default, can_modify)
        ('q_link', '[] -log(links) in low dim.', True, False),
        ('p_link', '[] -log(links) in high dim.', False, False),
        ('loss', '[] KL loss', True, False),
        ('auc_rnx', '[*] logRNX', True, True),
        ('cca_stress', '[*] CCA Stress', False, True),
        ('mds_isotonic', '[*] MDS Stress', False, True),
        ('pearsonr', '[*] Corr. Coef.', True, True),
        ('sammon_nlm', '[*] Sammon NLM', True, True)
    ]
    
    plot_data = []
    for line_name, display_name, show_by_default, can_modify in line_names:
        y = df_by_key[line_name]
        y_data = np.log(y) if use_log_y else y
        if can_modify:
            if show_inverse: y_data = _inverse(y_data)
            if show_neg: y_data = _negative(y_data)
            
        trace_i = go.Scatter(
            x = x_data,
            y = y_data,
            name = display_name,
            mode = 'lines+markers',
            line = {'shape': 'spline'},
            visible= '' if show_by_default else 'legendonly'
        )
        plot_data.append(trace_i)
    
    
    layout = dict(
        title = 'Metric values / NLL [{}  = {}]'.format(key_name, key_val),
        xaxis = dict(title = '{} {}'.format(
          'Log of ' if use_log_x else '',
          'Learning rate' if key_name == 'perplexity' else 'Perplexity')),
        yaxis = dict(title = '{} {}'.format(
          'Log of ' if use_log_y else '', 'Metrics value / negative LL')),
        autosize=False
    )
        
    fig = go.Figure(data=plot_data, layout=layout)
    iplot(fig)

In [24]:
# UI controls

datasetX = widgets.Dropdown(
    options={
        "Select dataset": "",
        "MNIST mini": "MNIST-SMALL",
        "COIL-20": "COIL20",
        "MNIST 2000 samples": "MNIST-2000",
        "Country Indicators 1999": "COUNTRY1999",
        "Country Indicators 2013": "COUNTRY2013",
        "Country Indicators 2014": "COUNTRY2014",
        "Country Indicators 2015": "COUNTRY2015",
        "Cars and Trucks 2004": "CARS04",
        "Breast Cancer Wisconsin (Diagnostic)": "BREAST-CANCER95",
        "Pima Indians Diabetes": "DIABETES",
        "Multidimensional Poverty Measures": "MPI"
    },
    value='',
    description='Dataset:',
)

pivotKeyX = widgets.Dropdown(
    options={
        'Negative log likelihood in low dim.': 'q_link',
        'Negative log likelihood in high dim.': 'p_link',
        'KL-loss': 'loss',
        'AUC R_NX': 'auc_rnx',
        'CorrCoef': 'pearsonr',
        'MDS Stress': 'mds_isotonic',
        'CCA Stress': 'cca_stress',
        'Sammon NLM': 'sammon_nlm'

    },
    value='q_link',
    description='Surface: ',
)

lrX = widgets.SelectionSlider(
    options=g_df['learning_rate'].unique(),
    value=100.0,
    description='LR ($\lambda$): '
)

perpX = widgets.SelectionSlider(
    options=g_df['perplexity'].unique(),
    value=50.0,
    description='Perplexity: '
)

useLogXAxisX = widgets.Checkbox(
    description='Log scale in xAxis',
    value=False,
)

useLogYAxisX = widgets.Checkbox(
    description='Log scale in yAxis',
    value=False,
)

showNegativeX = widgets.Checkbox(
    description='Negative stress func. [*]',
    value=False,
)

showInverseX = widgets.Checkbox(
    description='Inverse stress func. [*]',
    value=False,
)

line1NameX = widgets.Dropdown(
    options=g_df.columns,
    description='Line1: '
)

line2NameX = widgets.Dropdown(
    options=g_df.columns,
    description='Line2: '
)

In [25]:
# interactive plot

@interact(dataset_name=datasetX)
def select_dataset(dataset_name):   
    global g_df
    g_df = load_df(dataset_name)
    
    @interact(key_to_pivot=pivotKeyX)
    def plot_surface_by_name(key_to_pivot='q_link'):
        global g_df
        if g_df is None: return

        pivot_df = g_df[['learning_rate', 'perplexity', key_to_pivot]].pivot(
            index='learning_rate', columns='perplexity', values=key_to_pivot)

        _plot_surface(pivot_data=pivot_df, chart_title=key_to_pivot)

In [27]:
def plot_lines_by_key(dataset_name, lr, perp, use_log_x, use_log_y, show_inverse, show_neg):
    global g_df
    g_df = load_df(dataset_name)
    
    _plot_lines_by_key('learning_rate', lr, use_log_x, use_log_y, show_inverse, show_neg)
    _plot_lines_by_key('perplexity', perp, use_log_x, use_log_y, show_inverse, show_neg)
    
ui = widgets.VBox([
    widgets.HBox([datasetX, lrX, perpX]),
    widgets.HBox([useLogXAxisX, useLogYAxisX, showInverseX, showNegativeX])
])

out = widgets.interactive_output(plot_lines_by_key, 
               {'dataset_name': datasetX,
                'lr':lrX,
                'perp':perpX,
                'use_log_x': useLogXAxisX,
                'use_log_y': useLogYAxisX,
                'show_inverse': showInverseX,
                'show_neg': showNegativeX})

display(ui, out)

In [28]:
# plot running time and the number of iterations

def _plot_runningtime(key_name, key_val):
    global g_df
    if g_df is None: return
    
    if key_name == 'perplexity':
        df_by_key = g_df[g_df[key_name]==key_val]
        x_data = df_by_key['learning_rate']
    else: # learning_rate
        df_by_key = g_df[g_df[key_name]==key_val]
        x_data = df_by_key['perplexity']
    
    trace1 = go.Scatter(
        x=x_data,
        y=df_by_key['running_time'],
        name='Runnning time'
    )
    trace2 = go.Bar(
        x=x_data,
        y=df_by_key['n_iter'],
        name='# of iterations',
        yaxis='y2'
    )
    data = [trace1, trace2]
    layout = go.Layout(
        title='Running time and number of iterations [{} = {}]'.format(key_name, key_val),
        xaxis = dict(title = '{}'.format('Learning rate' if key_name == 'perplexity' else 'Perplexity')),
        yaxis=dict(
            title='Running time (s)'
        ),
        yaxis2=dict(
            title='Number of iterations',
            titlefont=dict(
                color='orange'
            ),
            tickfont=dict(
                color='orange'
            ),
            overlaying='y',
            side='right'
        ),
        legend=dict(orientation="h"),
        autosize=False
    )
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)

In [15]:
def plot_runningtime(dataset_name, lr, perp):
    global g_df
    g_df = load_df(dataset_name)
    
    _plot_runningtime('learning_rate', lr)
    _plot_runningtime('perplexity', perp)
    
ui2 = widgets.VBox([
    widgets.HBox([datasetX, lrX, perpX]),
])

out2 = widgets.interactive_output(plot_runningtime, 
               {'dataset_name': datasetX,
                'lr':lrX,
                'perp':perpX})

display(ui2, out2)

In [16]:
# compare two lines in the same graph
# fix learning_rate, because it does not influence too much the final result

def _plot_two_lines(line1_name, line2_name, use_log_x, use_log_y, show_inverse, show_neg, lr=100.0):
    global g_df
    if g_df is None: return
    
    _inverse = lambda y: np.divide(1.0, y, out=np.zeros_like(y), where=(y != 0))
    
    df_by_key = g_df[g_df['learning_rate']==lr]
    
    x_data = df_by_key['perplexity'] if not use_log_x else np.log(df_by_key['perplexity'])
    y1_data = df_by_key[line1_name] if not use_log_y else np.log(df_by_key[line1_name])
    y2_data = df_by_key[line2_name] if not use_log_y else np.log(df_by_key[line2_name])
    
    if show_inverse:
        # y1_data = _inverse(y1_data)
        y2_data = _inverse(y2_data)
        
    if show_neg:
        # y1_data = - y1_data
        y2_data = - y2_data
    
    trace1 = go.Scatter(
        x=x_data,
        y=y1_data,
        name=line1_name
    )
    trace2 = go.Scatter(
        x=x_data,
        y=y2_data,
        name=line2_name,
        yaxis='y2'
    )
    data = [trace1, trace2]
    layout = go.Layout(
        title='Compare the shape of {} and {} [learning_rate = {}]'.format(line1_name, line2_name, lr),
        xaxis = dict(title = '{} Perplexity'.format( 'Log of ' if use_log_x else '')),
        yaxis=dict(
            title=line1_name
        ),
        yaxis2=dict(
            title=line2_name,
            titlefont=dict(
                color='orange'
            ),
            tickfont=dict(
                color='orange'
            ),
            overlaying='y',
            side='right'
        ),
        legend=dict(orientation="h"),
        autosize=False
    )
    fig = go.Figure(data=data, layout=layout)
    print("[*] Inverse function and Negative function are applied only for the `Line2`")
    iplot(fig)

In [17]:
def plot_two_lines(dataset_name, line1_name, line2_name, use_log_x, use_log_y, show_inverse, show_neg):
    global g_df
    g_df = load_df(dataset_name)
    
    _plot_two_lines(line1_name, line2_name, use_log_x, use_log_y, show_inverse, show_neg, lr=100.0)
    
ui3 = widgets.VBox([
    widgets.HBox([datasetX, line1NameX, line2NameX]),
    widgets.HBox([useLogXAxisX, useLogYAxisX, showInverseX, showNegativeX]),
])

out3 = widgets.interactive_output(plot_two_lines, {
    'dataset_name': datasetX,
    'line1_name': line1NameX,
    'line2_name': line2NameX,
    'use_log_x': useLogXAxisX,
    'use_log_y': useLogYAxisX,
    'show_inverse': showInverseX,
    'show_neg': showNegativeX
})

display(ui3, out3)

In [29]:
@interact(dataset_name=datasetX)
def plot_scatter(dataset_name):
    if dataset_name == "": return
    
    in_name = '{}/tsne_{}.pkl'.format(data_folder, dataset_name)
    pkl_data = pickle.load(open(in_name, 'rb'))
    embeddeds = pkl_data['results']
    mustlinks = pkl_data['mustlinks']
    cannotlinks = pkl_data['cannotlinnks']
    target_labels = pkl_data['target']
    
    @interact(lr=lrX, perp=perpX)
    def _plot_scatter(lr, perp):
        target = next((item for item in embeddeds if item['learning_rate'] == lr and item['perplexity'] == perp))
        X_embedded = target['embedding']
        
        trace1 = go.Scattergl(
            x = X_embedded[:,0],
            y = X_embedded[:,1],
            mode='markers',
            marker=dict(
                size='6',
                color = target_labels,
                colorscale='Viridis',
                showscale=True
            )
        )
        layout = go.Layout(
            autosize=False,
            width=600,
            height=600,
            xaxis=dict(
                autorange=True,
                showgrid=False,
                zeroline=False,
                showline=False,
                autotick=True,
                ticks='',
                showticklabels=False
            ),
            yaxis=dict(
                autorange=True,
                showgrid=False,
                zeroline=False,
                showline=False,
                autotick=True,
                ticks='',
                showticklabels=True
            )
        )
        fig = go.Figure(data=[trace1], layout=layout)
        iplot(fig)