# Interact API for data analysis
> Jupyter compatible analysis frontend

* We seriously considered tensorboard, we admire its comprehensive visualization options targeting most of the ML/DL tasks, it's embedable in notebook environment, but with that many threads of lines, it's just won't work in this scenario.
* We do have a UI ran on flask, but with the popularity of colab and kaggle kernel (bliss the free gpu), we think it's very necessary to have a decent UI widgets for Jupyter
* And I just came accross ```ipywidgets.interact```, with the syntax sugar, so liberating

In [3]:
# default_exp visual

In [77]:
# export
from ipywidgets import interact,interact_manual
import pandas as pd
from torchember.utils import emberReader,get_ember_record
import json
from IPython.display import display,HTML
import plotly.graph_objects as go

In [172]:
# export
def scatter_cols(module_df,cols = ["min","max","mean","max"]):
    """
    scatter plot the columns d
    """
    df = module_df
    fig = go.Figure()
    tnames = df.tname.unique()
    for col in cols:
        for tname in tnames:
            sub_df = df[df.tname==tname]
            fig.add_trace(go.Scatter(x = sub_df.ts, 
                                     y = sub_df[col],
                                     line=dict(width=4),
                                     name = f"{col} {tname}"))
    module = list(df.module)[0]
    fig.update_layout(title=f"{module}",
                     yaxis_title = "Value",
                      xaxis_title = "Time",
                     )
    fig.show()


To experiment, run the choost_data() first

In [164]:
scatter_cols(vis.module_df[vis.module_df.ttype=="grad"])

## Visualize with scatter plot

#### Ugly prototype
To run interactive filter steps, I can use function returning function

In [173]:
def choose_data():
    latest_tasks = list(i["name"] for i in get_ember_record())[:20]
    def choose_task(task = latest_tasks):
        er = emberReader(task)
        def choose_log(log_file = er.t.log_files):
            html(f"<div>Log file:\t<code>{log_file}</code> selected</div>")
            df = pd.DataFrame(json.loads(er.read_log(log_file)))
            def choose_plot(module = df.module.unique(), 
                            ttype = df.ttype.unique(), 
                           ):
                cols = ["mean","std","max","min","zero_pct"]
                module_df = df[df.module == module]
                global ember_sub_df
                ember_sub_df =  module_df[module_df.ttype == ttype]
                html(f"<h3>Data selected</h3>")
                html(f"<h4>Module:\t{module}</h4>")
                html(f"<h4>Tensor Type:\t{ttype}</h4>")
                if len(ember_sub_df)>0:
                    scatter_cols(ember_sub_df,cols)
                else:
                    html(f"""<div>No such selection under <strong>{module}</Strong> 
                    <br><strong>{ttype}</Strong></div>""")
                return ember_sub_df
            interact_manual(choose_plot)
        sub_df = interact_manual(choose_log)
    html("<h2>Choose from task names</h2>")
    interact(choose_task)

Testing on the crude prototype

In [174]:
choose_data()

interactive(children=(Dropdown(description='task', options=('tinyVGG_20200411_151952', 'Sequential_20200328_00…

It works, just the way of doing it is beyond a self-pretentious python coder can take

## Visualize Statictics
> OOP improved

In [222]:
# export
def html(x):
    display(HTML(x))

class Visualize:
    def __init__(self):
        self.latest_tasks = list(i["name"] for i in get_ember_record())[:20]
        
    def choose_task_(self,build_later):
        def choose_task(task = self.latest_tasks):
            self.er = emberReader(task)
            self.log_files = self.er.t.log_files
            later_func = build_later()
            interact_manual(later_func)
        self.choose_task = choose_task
        
    def build_df(self,log_file):
        html(f"<div>Log file:\t<code>{log_file}</code> selected</div>")
        self.df = pd.DataFrame(json.loads(self.er.read_log(log_file)))
        self.modules = self.df.module.unique()
        self.ttypes = self.df.ttype.unique()
    
class VisualByTensor(Visualize):
    def __init__(self):
        super().__init__()
        # define the chaine by define next step
        self.choose_task_(self.choose_log_)
        interact(self.choose_task)
        
    def choose_log_(self):
        def choose_log(log_file = self.log_files):
            self.build_df(log_file)
            self.choose_plot_()
            interact_manual(self.choose_plot)
            
        self.choose_log = choose_log
        return self.choose_log
    
    def choose_plot_(self):
        def choose_plot(module = self.modules, 
                            ttype = self.ttypes, 
                           ):
            cols = ["mean","std","max","min","zero_pct"]
            self.module_df = self.df[self.df.module == module]
            self.ember_sub_df =  self.module_df[self.module_df.ttype == ttype]
            html(f"<h3>Data selected</h3>")
            html(f"<h4>Module:\t{module}</h4>")
            html(f"<h4>Tensor Type:\t{ttype}</h4>")
            if len(self.ember_sub_df)>0:
                scatter_cols(self.ember_sub_df,cols)
            else:
                html(f"""<div>No such selection under <strong>{module}</Strong> 
                    <br><strong>{ttype}</Strong></div>""")
            return self.ember_sub_df
        
        self.choose_plot = choose_plot

Having test

In [223]:
vis = VisualByTensor()

interactive(children=(Dropdown(description='task', options=('tinyVGG_20200411_151952', 'Sequential_20200328_00…

## Useful attributes

In [182]:
vis.df.sample(5)

Unnamed: 0,shape,mean,std,max,min,cnt_zero,zero_pct,module,ts,ttype,tname
169164,"[16, 64, 3, 3]",0.819615,0.843879,6.169227,-0.024417,0,0.0,model(tinyVGG).features(Sequential),2020-04-11 15:23:16,output_dt,output
32768,[16],1.00003,0.007097,1.020213,0.988215,0,0.0,model(tinyVGG).features(Sequential).0(vggBlock...,2020-04-11 15:20:48,weight,weight_0
46445,[32],0.000741,0.011984,0.024205,-0.019127,0,0.0,model(tinyVGG).features(Sequential).1(vggBlock...,2020-04-11 15:21:02,weight,weight_1
199958,"[16, 32, 14, 14]",-0.206816,0.708087,4.319022,-3.673861,0,0.0,model(tinyVGG).features(Sequential).1(vggBlock...,2020-04-11 15:23:50,input_dt,input
193157,"[16, 256]",0.401141,0.616993,3.168123,0.0,2354,0.574707,model(tinyVGG).fcb(Sequential).2(ReLU),2020-04-11 15:23:42,output_dt,output


In [183]:
vis.ember_sub_df.sample(5)

Unnamed: 0,shape,mean,std,max,min,cnt_zero,zero_pct,module,ts,ttype,tname
66566,"[16, 1, 3, 3]",0.006638774,0.02471338,0.1447885,-0.03399264,0,0.0,model(tinyVGG).features(Sequential).0(vggBlock...,2020-04-11 15:21:25,grad,grad_0
82513,[16],-1.238978e-08,5.706841e-08,6.137452e-08,-1.45094e-07,0,0.0,model(tinyVGG).features(Sequential).0(vggBlock...,2020-04-11 15:21:42,grad,grad_1
188775,[16],-1.384394e-09,5.088204e-08,8.774759e-08,-1.541121e-07,0,0.0,model(tinyVGG).features(Sequential).0(vggBlock...,2020-04-11 15:23:38,grad,grad_1
7338,"[16, 1, 3, 3]",-0.00696597,0.05473364,0.2185708,-0.1482361,0,0.0,model(tinyVGG).features(Sequential).0(vggBlock...,2020-04-11 15:20:19,grad,grad_0
52630,"[16, 1, 3, 3]",0.004181406,0.02578446,0.08739539,-0.105005,0,0.0,model(tinyVGG).features(Sequential).0(vggBlock...,2020-04-11 15:21:09,grad,grad_0


## Visualize Movement

In [312]:
# export
def moving_track(x):
    return np.mean(abs(x))

def clean_module_name(x):
    mlist = x.split(".")
    if len(mlist) <= 1:
        return x
    else:
        return ">".join(list(i.split("(")[0] for i in mlist[:-1])+list([mlist[-1],]))

class VisualMovement(Visualize):
    def __init__(self):
        super().__init__()
        self.choose_task_(self.choose_log_)
        
        interact(self.choose_task)
        
    def choose_log_(self):
        def choose_log(log_file = self.log_files):
            self.build_df(log_file)
            self.build_shift()
            self.choose_tensor_()
            interact_manual(self.choose_tensor)
            
        self.choose_log = choose_log
        return self.choose_log
    
    def choose_tensor_(self):
        def choose_tensor(ttype = {"Weights":"weight","Gradients":"grad","Outputs":"output_dt"}):
            self.sub_df = self.shifted[self.shifted.ttype==ttype]
            self.sub_df["module"] = self.sub_df.module.apply(clean_module_name)
            
            float_format= pd.options.display.float_format
            max_colwidth = pd.options.display.max_colwidth
            pd.options.display.float_format = lambda x:"%.5f"%(x)
            pd.options.display.max_colwidth = -1
            
            for col in ["mean","std"]:
                html(f"<h3>{ttype}/{col} movements</h3>")
                html(f"""<blockquote>{ttype} 
                        <strong>{col}</strong> top accumulated changes
                        </blockquote>""")
                display(self.sub_df\
                        .sort_values(by = col, ascending=False)\
                        .head(6)\
                        [["module","tname",col]])
                html(f"""<blockquote>{ttype} 
                        <strong>{col}</strong> least accumulated changes
                        </blockquote>""")
                display(self.sub_df\
                        .sort_values(by = col, ascending=True)\
                        .head(6)\
                        [["module","tname",col]])
                
            # change back the pandas option
            pd.options.display.float_format = float_format
            pd.options.display.max_colwidth = max_colwidth
        self.choose_tensor = choose_tensor
    
    def build_shift(self):
        """
        statistics on tensor shifting
        """
        self.shifted = self.df.groupby(["module","ttype","tname"])\
                .agg(moving_track)\
                .reset_index()[["module","ttype","tname","mean",
                                "std","min","max","zero_pct"]]
        return self.shifted

In [313]:
vis_m = VisualMovement()

interactive(children=(Dropdown(description='task', options=('tinyVGG_20200411_151952', 'Sequential_20200328_00…