In [None]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas_bokeh
import plotly.express as px
from pyhard import Measures, visualization
import importlib
import holoviews as hv
from holoviews import opts

In [None]:
sns.set()

plt.rcParams['figure.figsize'] = (16, 10)

pandas_bokeh.output_notebook()

In [None]:
data_path = os.path.realpath("/Users/pedropaiva/Documents/Doutorado/instance-hardness/data/")
matilda_path = os.path.realpath("/Users/pedropaiva/Documents/Doutorado/matilda-workspace/")

metadata_path = os.path.join(matilda_path, "metadata.csv")
feat_proc_path = os.path.join(matilda_path, "feature_process.csv")
overlap_path = os.path.join(data_path, "overlap.csv")

In [None]:
df_metadata = pd.read_csv(metadata_path, index_col='instances')
df_overlap = pd.read_csv(overlap_path)
df_feat_proc = pd.read_csv(feat_proc_path, index_col='Row')
df_feat_proc.index.name = 'instances'

In [None]:
measures = Measures(df_overlap, labels_col='class')

In [None]:
df_measures = measures.calculate_all()
df = df_overlap.join(df_measures)

In [None]:
importlib.reload(visualization)

In [None]:
visualization.scatter_widget(df, width=950, height=650)

# Matilda

In [None]:
df_proj_matrix = pd.read_csv("/Users/pedropaiva/Documents/Doutorado/matilda-workspace/projection_matrix.csv")
proj_matrix = df_proj_matrix.iloc[:, 1:].values

In [None]:
# features = df_metadata.filter(regex="^feature_").values
features = df_feat_proc.values

In [None]:
proj = np.dot(features, proj_matrix.T)
df_is = pd.DataFrame(data=proj, columns=['z1', 'z2'], index=df_feat_proc.index)

In [None]:
px.scatter(df_is, x='z1', y='z2', width=800, height=500)

In [None]:
df_overlap.index = df_metadata.index
data = df_is.join(df_overlap)
data['class'] = data['class'].apply(lambda x: str(x))

In [None]:
from holoviews.plotting.links import DataLink
from bokeh.models import HoverTool

hv.extension('bokeh')

scatter1 = hv.Scatter(data, 'V1', vdims=['V2', 'class']).opts(width=450, height=500, color='class', 
                                                              cmap='Set1', show_grid=True)

scatter2 = hv.Scatter(data, 'z1', vdims=['z2', 'class']).opts(width=450, height=500, color='class', 
                                                              cmap='Set1', show_grid=True)

tooltips = [
    ('V1', '@V1'),
    ('V2', '@V2'),
    ('z1', '@z1'),
    ('z2', '@z2'),
    ('Class', '@class')
]
hover = HoverTool(tooltips=tooltips)

dlink = DataLink(scatter1, scatter2)

layout = hv.Layout(scatter1 + scatter2).cols(2)
layout.opts(opts.Scatter(tools=['box_select', 'lasso_select', hover], size=4), 
            opts.Layout(shared_axes=True, shared_datasource=True))
layout