# Interact API for data analysis
> Jupyter compatible analysis frontend

* We seriously considered tensorboard, we admire its comprehensive visualization options targeting most of the ML/DL tasks, it's embedable in notebook environment, but with that many threads of lines, it's just won't work in this scenario.
* We do have a UI ran on flask, but with the popularity of colab and kaggle kernel (bliss the free gpu), we think it's very necessary to have a decent UI widgets for Jupyter
* And I just came accross ```ipywidgets.interact```, with the syntax sugar, so liberating

In [1]:
# default_exp visual

In [2]:
# export
from ipywidgets import interact,interact_manual
import pandas as pd
from torchember.utils import emberReader,get_ember_record
import json
from IPython.display import display,HTML
import plotly.graph_objects as go

try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

To experiment, run the choost_data() first

## Visualize with scatter plot

#### Ugly prototype
To run interactive filter steps, I can use function returning function

In [3]:
def choose_data():
    latest_tasks = list(i["name"] for i in get_ember_record())[:20]
    def choose_task(task = latest_tasks):
        er = emberReader(task)
        def choose_log(log_file = er.t.log_files):
            html(f"<div>Log file:\t<code>{log_file}</code> selected</div>")
            df = pd.DataFrame(json.loads(er.read_log(log_file)))
            def choose_plot(module = df.module.unique(), 
                            ttype = df.ttype.unique(), 
                           ):
                cols = ["mean","std","max","min","zero_pct"]
                module_df = df[df.module == module]
                global ember_sub_df
                ember_sub_df =  module_df[module_df.ttype == ttype]
                html(f"<h3>Data selected</h3>")
                html(f"<h4>Module:\t{module}</h4>")
                html(f"<h4>Tensor Type:\t{ttype}</h4>")
                if len(ember_sub_df)>0:
                    scatter_cols(ember_sub_df,cols)
                else:
                    html(f"""<div>No such selection under <strong>{module}</Strong> 
                    <br><strong>{ttype}</Strong></div>""")
                return ember_sub_df
            interact_manual(choose_plot)
        sub_df = interact_manual(choose_log)
    html("<h2>Choose from task names</h2>")
    interact(choose_task)

Testing on the crude prototype

In [None]:
choose_data()

It works, just the way of doing it is beyond a self-pretentious python coder can take

## Visualize Statictics
> OOP improved

In [4]:
# export
def html(x):
    display(HTML(x))

class Visualize:
    def __init__(self):
        self.latest_tasks = list(i["name"] for i in get_ember_record())[:20]
        
    def choose_task_(self,build_later):
        def choose_task(task = self.latest_tasks):
            self.er = emberReader(task)
            self.log_files = self.er.t.log_files
            later_func = build_later()
            interact_manual(later_func)
        self.choose_task = choose_task
        
    def build_df(self,log_file):
        html(f"<div>Log file:\t<code>{log_file}</code> selected</div>")
        self.df = pd.DataFrame(json.loads(self.er.read_log(log_file)))
        self.modules = self.df.module.unique()
        self.ttypes = self.df.ttype.unique()
    
class VisualByTensor(Visualize):
    def __init__(self):
        super().__init__()
        # define the chaine by define next step
        self.choose_task_(self.choose_log_)
        interact(self.choose_task)
        
    def choose_log_(self):
        def choose_log(log_file = self.log_files):
            self.build_df(log_file)
            self.choose_plot_()
            interact_manual(self.choose_plot)
            
        self.choose_log = choose_log
        return self.choose_log
    
    def choose_plot_(self):
        def choose_plot(module = self.modules, 
                            ttype = self.ttypes, 
                           ):
            cols = ["mean","std","max","min","zero_pct"]
            self.module_df = self.df[self.df.module == module]
            self.ember_sub_df =  self.module_df[self.module_df.ttype == ttype]
            html(f"<h3>Data selected</h3>")
            html(f"<h4>Module:\t{module}</h4>")
            html(f"<h4>Tensor Type:\t{ttype}</h4>")
            if len(self.ember_sub_df)>0:
                display(self.scatter_cols(self.ember_sub_df,cols))
            else:
                html(f"""<div>No such selection under <strong>{module}</Strong> 
                    <br><strong>{ttype}</Strong></div>""")
            return self.ember_sub_df
        
        self.choose_plot = choose_plot
        
    def scatter_cols(self,module_df,cols = ["min","max","mean","max"]):
        """
        scatter plot the columns d
        """
        df = module_df
        fig = go.Figure()
        tnames = df.tname.unique()
        for col in cols:
            for tname in tnames:
                sub_df = df[df.tname==tname]
                fig.add_trace(go.Scatter(x = sub_df.ts, 
                                         y = sub_df[col],
                                         line=dict(width=4),
                                         name = f"{col} {tname}"))
        module = list(df.module)[0]
        fig.update_layout(title=f"{module}",
                         yaxis_title = "Value",
                          xaxis_title = "Time",
                         )
        if IN_COLAB:
            fig.show(renderer="colab")
        else:
            fig.show()


Having test

In [5]:
vis = VisualByTensor()

interactive(children=(Dropdown(description='task', options=('tinyVGG_20200411_151952', 'Sequential_20200328_00…

## Useful attributes

In [6]:
vis.df.sample(5)

Unnamed: 0,shape,mean,std,max,min,cnt_zero,zero_pct,module,ts,ttype,tname
202605,[10],1.169974e-09,2.17351e-09,5.122274e-09,-1.920853e-09,1,0.1,model(tinyVGG).fcb(Sequential).3(Linear),2020-04-11 15:23:53,grad,grad_1
147485,[32],-0.008100387,0.02281245,0.04030757,-0.06566864,0,0.0,model(tinyVGG).features(Sequential).1(vggBlock...,2020-04-11 15:22:52,weight,weight_1
76479,[10],0.009724541,0.03381424,0.04893364,-0.06155158,0,0.0,model(tinyVGG).fcb(Sequential).3(Linear),2020-04-11 15:21:36,weight,weight_1
96134,"[16, 64, 3, 3]",0.8534161,0.8471205,5.928071,-0.02168639,0,0.0,model(tinyVGG).features(Sequential),2020-04-11 15:21:57,output_dt,output
190301,"[16, 16, 14, 14]",0.5344651,0.7085245,7.341032,-0.03886062,0,0.0,model(tinyVGG).features(Sequential).1(vggBlock...,2020-04-11 15:23:39,input_dt,input


In [7]:
vis.ember_sub_df.sample(5)

Unnamed: 0,shape,mean,std,max,min,cnt_zero,zero_pct,module,ts,ttype,tname
106230,"[16, 1, 3, 3]",0.0081533,0.03022903,0.09953728,-0.08875045,0,0.0,model(tinyVGG).features(Sequential).0(vggBlock...,2020-04-11 15:22:09,grad,grad_0
203916,"[16, 1, 3, 3]",0.001536577,0.007859792,0.0295296,-0.01787023,0,0.0,model(tinyVGG).features(Sequential).0(vggBlock...,2020-04-11 15:23:56,grad,grad_0
196011,[16],7.799354e-10,2.460518e-08,8.180201e-08,-2.929983e-08,1,0.0625,model(tinyVGG).features(Sequential).0(vggBlock...,2020-04-11 15:23:46,grad,grad_1
116817,[16],4.134011e-08,7.885138e-08,2.931062e-07,-8.208161e-08,0,0.0,model(tinyVGG).features(Sequential).0(vggBlock...,2020-04-11 15:22:21,grad,grad_1
202711,[16],-1.532726e-09,4.049379e-08,7.313577e-08,-8.546601e-08,1,0.0625,model(tinyVGG).features(Sequential).0(vggBlock...,2020-04-11 15:23:53,grad,grad_1


## Visualize Movement

In [10]:
# export
import numpy as np
def moving_track(x):
    return np.mean(abs(x))

def clean_module_name(x):
    mlist = x.split(".")
    if len(mlist) <= 1:
        return x
    else:
        return ">".join(list(i.split("(")[0] for i in mlist[:-1])+list([mlist[-1],]))

class VisualMovement(Visualize):
    def __init__(self):
        super().__init__()
        self.choose_task_(self.choose_log_)
        
        interact(self.choose_task)
        
    def choose_log_(self):
        def choose_log(log_file = self.log_files):
            self.build_df(log_file)
            self.build_shift()
            self.choose_tensor_()
            interact_manual(self.choose_tensor)
            
        self.choose_log = choose_log
        return self.choose_log
    
    def choose_tensor_(self):
        def choose_tensor(ttype = {"Weights":"weight","Gradients":"grad","Outputs":"output_dt"}):
            self.sub_df = self.shifted[self.shifted.ttype==ttype]
            self.sub_df.loc[:,"module"] = self.sub_df.module.apply(clean_module_name)
            
            float_format= pd.options.display.float_format
            max_colwidth = pd.options.display.max_colwidth
            pd.options.display.float_format = lambda x:"%.5f"%(x)
            pd.options.display.max_colwidth = -1
            
            for col in ["mean","std"]:
                html(f"<h3>{ttype}/{col} movements</h3>")
                html(f"""<blockquote>{ttype} 
                        <strong>{col}</strong> top accumulated changes
                        </blockquote>""")
                display(self.sub_df\
                        .sort_values(by = col, ascending=False)\
                        .head(6)\
                        [["module","tname",col]])
                html(f"""<blockquote>{ttype} 
                        <strong>{col}</strong> least accumulated changes
                        </blockquote>""")
                display(self.sub_df\
                        .sort_values(by = col, ascending=True)\
                        .head(6)\
                        [["module","tname",col]])
                
            # change back the pandas option
            pd.options.display.float_format = float_format
            pd.options.display.max_colwidth = max_colwidth
        self.choose_tensor = choose_tensor
    
    def build_shift(self):
        """
        statistics on tensor shifting
        """
        self.shifted = self.df.groupby(["module","ttype","tname"])\
                .agg(moving_track)\
                .reset_index()[["module","ttype","tname","mean",
                                "std","min","max","zero_pct"]]
        return self.shifted

In [11]:
vis_m = VisualMovement()

interactive(children=(Dropdown(description='task', options=('tinyVGG_20200411_151952', 'Sequential_20200328_00…