# Torch Ember Core
> Analyzing How Model Improves

In [1]:
# default_exp core

In this tutorial, we'll use AlexNet as example, 
We can load AlexNet from ```torchvision```

By:
* Xiaochen Zhang
* Lai Wei

In [2]:
from torchvision.models.alexnet import AlexNet
import torch


In [3]:
model = AlexNet()

#### Sample data
Create a sample data, something like 2 normalized images in a batch, size 224,224

In [4]:
samp = (torch.rand(2,3,224,224)-1)*2

## File, log, data, directory handler

In [10]:
# export
import os
from pathlib import Path
import json
import pandas as pd
from datetime import datetime
import torch
from torchember.helper import color
from functools import partial

class tracker(object):
    def __init__(self, libname, fname):
        self.libname = libname
        self.fname = fname
        self.home = Path(os.environ['HOME'])
        self.dir = self.home/f".{libname}"
        self.dir.mkdir(exist_ok = True) 
        self.data = self.dir/"data"
        self.data.mkdir(exist_ok = True)
        self.log = self.dir/"log"
        self.log.mkdir(exist_ok = True) 
        self.log_path = self.log/self.fname
        self.log_path.mkdir(exist_ok=True)
        self.marked = {}
        self.mark(init="00")
        
    def __repr__(self):
        return f"<{self.libname}:{self.fname}>"
        
    def mkdir(self, path):
        Path(path).mkdir(exist_ok=True)
        
    def __setitem__(self, fname,dict_):
        with open(self.data/f"{fname}.json","w") as f: f.write(json.dumps(dict_, indent = 2))

    
    def __getitem__(self,fname):
        return json.loads(open(self.data/f"{fname}.json","r").read())
        
    def logging(self,line):
        with open(self.log_file,"a") as f :f.write(line+"\n")
        return self.log_file
    
    def mark(self,**kwargs):
        self.marked.update(kwargs)
        file_name = "_".join(f"{k}-{v}" for k,v in self.marked.items())
        self.log_file = self.log_path/f"{file_name}.log"
        
    def __call__(self,dict_):
        """
        add a dictionary to log
        """
        self.logging(json.dumps(dict_))
        return self
        
    def lines(self):
        return list(json.loads(i) for i in open(self.log_file).read().split("\n")[:-1])
    
    @property
    def ts(self):
        return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    @property
    def df(self):
        return pd.DataFrame(self.lines())
    
class emberTracker(tracker):
    def __init__(self, fname):
        super().__init__("torchember",fname)
        self.latest = self.log/f"{fname}_latest"
        self.latest_lines = ""
        
    def logging(self,line):
        with open(self.log_file,"a") as f : f.write(line+"\n")
        self.latest_lines+=(line+"\n")
        return self.log_file
    
    def refresh(self):
        """
        lastest always contain the record of the latest batch
        """
        with open(self.latest,"w") as f :  f.write(self.latest_lines)
        self.latest_lines = ""
        return self.latest
    
    def latest_line_list(self):
        return list(json.loads(i) for i in open(self.latest).read().split("\n")[:-1])
    
    @property
    def latest_df(self):
        return pd.DataFrame(self.latest_line_list())
        

### Test on the data tracker

In [11]:
etrack = emberTracker("testmodel")

This is how we record the data within other codes

In [12]:
for i in range(10):
    etrack({"col1":i,"col2":i*2,"col3":"hahahha"})

Preview log file

In [13]:
!cat {etrack.log_path}

cat: /Users/salvor/.torchember/log/testmodel: Is a directory


Read log as dataframe

In [14]:
etrack.df

Unnamed: 0,col1,col2,col3
0,0,0,hahahha
1,1,2,hahahha
2,2,4,hahahha
3,3,6,hahahha
4,4,8,hahahha
5,5,10,hahahha
6,6,12,hahahha
7,7,14,hahahha
8,8,16,hahahha
9,9,18,hahahha


In [15]:
!rm {etrack.log_path}

rm: /Users/salvor/.torchember/log/testmodel: is a directory


### Save/ Get JSON data

Save dictionary to json data

In [16]:
etrack["about_this_model"]= {
    "property1":1,
    "property2":False,
    "property3":
        {"size":100,
         "speed":200}
    }

Read this meta data from json file

In [17]:
etrack["about_this_model"]["property2"]

False

## Torch Ember Core

The essence of torch ember, is to place trackers within modules.

It will decorate the ```forward``` function to achieve following purpose

* What variables come in/out of the module
* The happening sequence, containing relationships between sub-modules
* The statistics we want for further analysis, eg.
    * Min, Max, Mean, Std, of input / outpout tensors
    * Min, Max, Mean, Std, of model weights at this iteration
    * Min, Max, Mean, Std, of model weights grad at this iteration

In [18]:
# export
from types import MethodType
from datetime import datetime

class moduleTrack(object):
    def __init__(self,module, name=None, root_module = False):
        self.module = module
        module.module_tracker = self
        
        self.base_module = True if len(list(module.modules()))==1 else False
        self.root_module = root_module
        
        self.name = name if name else module.__class__.__name__  
        #self.name = f'{name}_tracker' if name else f'{module.__class__.__name__}_tracker'
        self.id = id(module)
        self.children = []
        
    def __repr__(self):
        rt = f"<{self.name} @ {hex(self.id)}>"
        if hasattr(self,"input_dt"):
            rt+=f'\n\t[Inputs]{",".join(list(k+" "+str(list(v.shape)) for k,v in self.input_dt.items()))}'
        if hasattr(self,"output_dt"):
            rt+=f'\n\t[Outputs]{",".join(list(str(list(v.shape)) for v in self.output_dt))}'
        return rt

def get_stats(tensor):
    """
    The default statistic method, it will capture
    shape of the tensor
    mean, std, max, min of the tensor
    this will return a dictionary
    """
    def list_prod(l):
        result=1
        for i in l:
            result*=i
        return result
    return {"shape":list(tensor.shape),
            "mean":tensor.mean().item(), 
            "std":tensor.std().item(), 
            "max":tensor.max().item(), 
            "min":tensor.min().item(),
            "cnt_zero": ((tensor>-1e-10) & (tensor < 1e-10)).sum().item(),
            "zero_pct": float(((tensor>-1e-10) & (tensor < 1e-10)).sum().item())/list_prod(tensor.shape)}


    
class torchEmber(object):
    def __init__(self, model, verbose = True):
        color.green|"start analyzing model"
        self.modules = dict()
        self.verbose = verbose
        self.model = model
        
        if hasattr(model,"disarm"):
            model.disarm()
        
        self.model_name = self.model.__class__.__name__
        
        fname = f"{self.model_name}_{self.ts_str}"
        self.fname = fname
        
        self.t = emberTracker(fname)
        self.current_mt = None
        self.mt_log = []
        self.record_extra = False
        
        self.arm()
        
        self.legit_ttypes = ["in","out","weight"]
        for ttype in self.legit_ttypes: self.set_metric(ttype)(get_stats)

        if self.verbose: 
            color.green|f"[INFO][{self.ts_str}]Creating meta data"
        self.t[f"base_{fname}"]={"start":self.t.ts, 
                                 "user":os.environ["USER"]}
        self.t[f"vis_{fname}"] = {"vis_type":"standard"}
        self.t[f"structure_{fname}"] = self.mod_tree()
        
    def mark(self,**kwargs):
        self.t.mark(**kwargs)
        
    def parse_module(self,model, name, root_module = False):
        name = f"{name}({model.__class__.__name__})"
        mt = moduleTrack(model, name, root_module)
        self.modules[name]= mt
        model.forward = self.module_register(name,model)
        
        for cname,children in model.named_children():
            children_mt = self.parse_module(children,f"{name}.{cname}" )
            children_mt.parent = mt
            mt.children.append(children_mt)
        return mt
    
    def mod_tree(self):
        """
        Return the tree of module
        """
        return self.mod_tree_parse(self.model.module_tracker)
        
    def mod_tree_parse(self,mt):
        rt = {"name":mt.name, "short":mt.name.split(".")[-1]}
        if len(mt.children)>0:
            rt.update({"children":list(self.mod_tree_parse(i) for i in mt.children)})
        return rt
                
        
    @property
    def ts_str(self):
        return datetime.now().strftime("%Y%m%d_%H%M%S")
    
    @property
    def ts(self):
        return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        
    def arm(self):
        """
        arming the tracing function to self.model
        """
        if self.verbose: 
            color.yellow|f"[ARMING][START]{self.ts}"
        self.parse_module(self.model,"model", root_module = True)
        if self.verbose: 
            color.yellow|f"[ARMING][SUCCESS]{self.ts}"
            
    def disarm(self):
        """remove the tracing function"""
        for m in self.modules.values():
            if self.verbose: 
                color.blue|f"[DISARM][{m.name}]{self.ts}"
            self.recover(m)
        color.blue|f"[DISARM][DONE]{self.ts}"
            
    def recover(self, m):
        if hasattr(m.module.forward,"former"):
            m.module.forward = m.module.forward.former
            
    def rearm(self):
        self.disarm()
        self.arm()
    
    def reg_check(self,m):
        """
        register check
        """
        if hasattr(m.forward,"armed"):
            if m.forward.armed:
                return False
        return True
    
    def set_metric(self, ttype):
        assert ttype in self.legit_ttypes, f"ttype has to be one of {str(self.ttypes)}"
        def deco(f):
            setattr(self,f"record_{ttype}_core",self.record_core(f))
            return f
        return deco
    
    def add_record(f):
        def _inner(self, f_name): return partial(f, self, f_name)
        return _inner
    
    @add_record
    def record_core(self, f_name, tensor, extra_data):
        """
        extra_data: dict
        """
        dict_= f_name(tensor)
        dict_.update(extra_data)
        self.t(dict_)
        return dict_
    
    def record_input(self,mt):
        """
        Record the input tensors of the moduleTrack
        """
        for k,tensor in mt.input_dt.items():
            extra_data= {"module":mt.name,"ts":self.t.ts,"ttype":"input","tname":k}
            if self.record_extra: self.add_extra_info(extra_data)
            self.record_in_core(tensor, extra_data)
            
    def record_output(self,mt):
        """
        Record the output tensors of the moduleTrack
        """
        for i in range(len(mt.output_dt)):
            tensor = mt.output_dt[i]
            extra_data = {"module":mt.name,"ts":self.t.ts,"ttype":"output","tname":f"output_{i}"}
            if self.record_extra:self.add_extra_info(extra_data)
            self.record_out_core(tensor,extra_data)
            
    def record_weight(self,mt):
        """
        Record the weights of the moduleTrack
        """
        if mt.base_module:
            i = 0
            for p in mt.module.parameters():
                extra_data={"module":mt.name,"ts":self.t.ts,
                                            "ttype":"weight","tname":f"weight_{i}"}
                if self.record_extra: self.add_extra_info(extra_data)
                self.record_weight_core(p.data, extra_data)
                if p.requires_grad and (p.grad!= None):
                    extra_data={"module":mt.name,"ts":self.t.ts,
                                            "ttype":"weight_grad","tname":f"grad_{i}"}
                    if self.record_extra: self.add_extra_info(extra_data)
                    self.record_weight_core(p.grad, extra_data)
                i+=1
                
    def add_extra(self, **kwargs):
        """
        Record the epoch # and batch #, in order to track the change of parameters over training process.
        After the model is armed, when users put model in training loop, have option to set it up. 
        """
        self.record_extra = True
        self.extra_info={}
        for key, value in kwargs.items():
            self.extra_info.update({f'{key}': value})
        
    def add_extra_info(self,extra_data):
        extra_data.update(self.extra_info)
    
    def after_train(self):
        """
        reset record batch after training
        """
        if self.record_extra: 
            self.record_extra=False
            self.extra_info = None
        
        
    def module_register(self,name,m):
        if self.reg_check(m) == False: return m.forward
        f = m.forward
        mt = self.modules[name]
        vs = f.__code__.co_varnames
        mt.vars = vs[1:]
        if self.verbose: 
            color.cyan | f"[BUILD FORWARD][{name}]{self.ts}"
        def new_forward(*args,**kwargs):
            mt.input_dt = dict(zip(mt.vars[:len(args)],args))
            mt.input_dt.update(kwargs)
            
            self.record_input(mt)
            self.current_mt = mt
            if mt.root_module: self.mt_log=[]
            self.mt_log.append(f"enter {mt.name}")
            
            # ------execution of the function------
            outputs = f(*args,**kwargs)
            self.record_weight(mt)
            # ------execution of the function------
            
            self.mt_log.append(f"exit {mt.name}")
            
            if type(outputs) in [list,tuple]:
                mt.output_dt = [outputs]
            else:
                mt.output_dt = [outputs,]
            self.record_output(mt)
            
            if mt.root_module:
                self.t.refresh() # start a new "latest" file
            
            return outputs
        
        setattr(new_forward,"armed",True)
        setattr(new_forward,"former",f)
        
        def disarm(this):
            """
            Remove the trackers placed by torchember
            run model.disarm()
            """
            self.disarm()
            return this
        setattr(mt.module, "disarm",MethodType(disarm,mt.module))
        return new_forward

### Tracking a model !!

Start tracking a model

In [19]:
te = torchEmber(model)

[92mstart analyzing model[0m
[93m[ARMING][START]2020-03-03 23:50:54[0m
[96m[BUILD FORWARD][model(AlexNet)]2020-03-03 23:50:54[0m
[96m[BUILD FORWARD][model(AlexNet).features(Sequential)]2020-03-03 23:50:54[0m
[96m[BUILD FORWARD][model(AlexNet).features(Sequential).0(Conv2d)]2020-03-03 23:50:54[0m
[96m[BUILD FORWARD][model(AlexNet).features(Sequential).1(ReLU)]2020-03-03 23:50:54[0m
[96m[BUILD FORWARD][model(AlexNet).features(Sequential).2(MaxPool2d)]2020-03-03 23:50:54[0m
[96m[BUILD FORWARD][model(AlexNet).features(Sequential).3(Conv2d)]2020-03-03 23:50:54[0m
[96m[BUILD FORWARD][model(AlexNet).features(Sequential).4(ReLU)]2020-03-03 23:50:54[0m
[96m[BUILD FORWARD][model(AlexNet).features(Sequential).5(MaxPool2d)]2020-03-03 23:50:54[0m
[96m[BUILD FORWARD][model(AlexNet).features(Sequential).6(Conv2d)]2020-03-03 23:50:54[0m
[96m[BUILD FORWARD][model(AlexNet).features(Sequential).7(ReLU)]2020-03-03 23:50:54[0m
[96m[BUILD FORWARD][model(AlexNet).features(Sequential)

Remove the trackers we placed

In [20]:
model = model.disarm()

[94m[DISARM][model(AlexNet)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).features(Sequential)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).features(Sequential).0(Conv2d)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).features(Sequential).1(ReLU)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).features(Sequential).2(MaxPool2d)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).features(Sequential).3(Conv2d)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).features(Sequential).4(ReLU)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).features(Sequential).5(MaxPool2d)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).features(Sequential).6(Conv2d)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).features(Sequential).7(ReLU)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).features(Sequential).8(Conv2d)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).features(Sequential).9(ReLU)]2020-03-03 23:50:54[0m
[94m[DISARM][model(AlexNet).featur

Or like this

In [21]:
te.disarm()

[94m[DISARM][model(AlexNet)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).features(Sequential)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).features(Sequential).0(Conv2d)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).features(Sequential).1(ReLU)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).features(Sequential).2(MaxPool2d)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).features(Sequential).3(Conv2d)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).features(Sequential).4(ReLU)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).features(Sequential).5(MaxPool2d)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).features(Sequential).6(Conv2d)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).features(Sequential).7(ReLU)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).features(Sequential).8(Conv2d)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).features(Sequential).9(ReLU)]2020-03-03 23:50:55[0m
[94m[DISARM][model(AlexNet).featur

Okay, refresh the tracker

In [22]:
te.rearm()

[94m[DISARM][model(AlexNet)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).features(Sequential)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).features(Sequential).0(Conv2d)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).features(Sequential).1(ReLU)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).features(Sequential).2(MaxPool2d)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).features(Sequential).3(Conv2d)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).features(Sequential).4(ReLU)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).features(Sequential).5(MaxPool2d)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).features(Sequential).6(Conv2d)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).features(Sequential).7(ReLU)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).features(Sequential).8(Conv2d)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).features(Sequential).9(ReLU)]2020-03-03 23:50:56[0m
[94m[DISARM][model(AlexNet).featur

Run forward pass for 3 iterations, nothing strange happend

In [24]:
te.mark(phase="train")
for epoch in range(2):
    te.mark(epoch=epoch)
    for batch in range(3):
        te.add_extra(n_batch=batch)
        model(samp)
te.mark(phase="valid")
for epoch in range(2):
    te.mark(epoch=epoch)
    for batch in range(2):
        te.add_extra(n_batch=batch)
        model(samp)
te.after_train()

In [27]:
!ls -l ~/.torchember/log/AlexNet_20200303_235054

total 400
-rw-r--r--  1 salvor  staff  59804 Mar  3 23:51 init-00_phase-train_epoch-0.log
-rw-r--r--  1 salvor  staff  59832 Mar  3 23:51 init-00_phase-train_epoch-1.log
-rw-r--r--  1 salvor  staff  39868 Mar  3 23:51 init-00_phase-valid_epoch-0.log
-rw-r--r--  1 salvor  staff  39859 Mar  3 23:51 init-00_phase-valid_epoch-1.log


### Check snowballing tensor stats

In [28]:
te.t.df

Unnamed: 0,shape,mean,std,max,min,cnt_zero,zero_pct,module,ts,ttype,tname,n_batch
0,"[2, 3, 224, 224]",-1.000057,0.577234,-0.000005,-1.999995,0,0.0,model(AlexNet),2020-03-03 23:51:12,input,x,0
1,"[2, 3, 224, 224]",-1.000057,0.577234,-0.000005,-1.999995,0,0.0,model(AlexNet).features(Sequential),2020-03-03 23:51:12,input,input,0
2,"[2, 3, 224, 224]",-1.000057,0.577234,-0.000005,-1.999995,0,0.0,model(AlexNet).features(Sequential).0(Conv2d),2020-03-03 23:51:12,input,input,0
3,"[64, 3, 11, 11]",-0.000027,0.030262,0.052485,-0.052486,0,0.0,model(AlexNet).features(Sequential).0(Conv2d),2020-03-03 23:51:12,weight,weight_0,0
4,[64],-0.001849,0.029150,0.052460,-0.048437,0,0.0,model(AlexNet).features(Sequential).0(Conv2d),2020-03-03 23:51:12,weight,weight_1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
123,"[1000, 4096]",-0.000004,0.009020,0.015625,-0.015625,0,0.0,model(AlexNet).classifier(Sequential).6(Linear),2020-03-03 23:51:14,weight,weight_0,1
124,[1000],-0.000127,0.008769,0.015615,-0.015500,0,0.0,model(AlexNet).classifier(Sequential).6(Linear),2020-03-03 23:51:14,weight,weight_1,1
125,"[2, 1000]",-0.000347,0.011486,0.033753,-0.034803,0,0.0,model(AlexNet).classifier(Sequential).6(Linear),2020-03-03 23:51:14,output,output_0,1
126,"[2, 1000]",-0.000347,0.011486,0.033753,-0.034803,0,0.0,model(AlexNet).classifier(Sequential),2020-03-03 23:51:14,output,output_0,1


Now let's start record weight grad data, once we use backward(), we'll soon have grad data kick in when next forward pass is called

### Track weight gradients

In [29]:
for i in range(3):
    model(samp).mean().backward()

As you can see here, for conv layer, 
* grad_0 is for the 1st weight grad tensor(weight), 
* grad_1 is for the 2nd(bias)

In [30]:
te.t.df[te.t.df["ttype"]=="weight_grad"].head()

Unnamed: 0,shape,mean,std,max,min,cnt_zero,zero_pct,module,ts,ttype,tname,n_batch
196,"[64, 3, 11, 11]",5.763019e-06,2e-05,8.7e-05,-7e-05,67,0.002884,model(AlexNet).features(Sequential).0(Conv2d),2020-03-03 23:51:49,weight_grad,grad_0,
198,[64],-5.81631e-06,1.8e-05,4.1e-05,-5.9e-05,0,0.0,model(AlexNet).features(Sequential).0(Conv2d),2020-03-03 23:51:49,weight_grad,grad_1,
206,"[192, 64, 5, 5]",-1.541117e-06,1.6e-05,0.000106,-0.000138,38819,0.126364,model(AlexNet).features(Sequential).3(Conv2d),2020-03-03 23:51:49,weight_grad,grad_0,
208,[192],-2.695832e-06,2.2e-05,6.2e-05,-8e-05,6,0.03125,model(AlexNet).features(Sequential).3(Conv2d),2020-03-03 23:51:49,weight_grad,grad_1,
216,"[384, 192, 3, 3]",-5.951989e-07,1.2e-05,0.000155,-0.000147,184824,0.278537,model(AlexNet).features(Sequential).6(Conv2d),2020-03-03 23:51:49,weight_grad,grad_0,


### Module tree json
This file will be stored at ```$HOME/.torchember/data/structure_<modelname>_<date>_<time>.json```

In [31]:
te.mod_tree()

{'name': 'model(AlexNet)',
 'short': 'model(AlexNet)',
 'children': [{'name': 'model(AlexNet).features(Sequential)',
   'short': 'features(Sequential)',
   'children': [{'name': 'model(AlexNet).features(Sequential).0(Conv2d)',
     'short': '0(Conv2d)'},
    {'name': 'model(AlexNet).features(Sequential).1(ReLU)',
     'short': '1(ReLU)'},
    {'name': 'model(AlexNet).features(Sequential).2(MaxPool2d)',
     'short': '2(MaxPool2d)'},
    {'name': 'model(AlexNet).features(Sequential).3(Conv2d)',
     'short': '3(Conv2d)'},
    {'name': 'model(AlexNet).features(Sequential).4(ReLU)',
     'short': '4(ReLU)'},
    {'name': 'model(AlexNet).features(Sequential).5(MaxPool2d)',
     'short': '5(MaxPool2d)'},
    {'name': 'model(AlexNet).features(Sequential).6(Conv2d)',
     'short': '6(Conv2d)'},
    {'name': 'model(AlexNet).features(Sequential).7(ReLU)',
     'short': '7(ReLU)'},
    {'name': 'model(AlexNet).features(Sequential).8(Conv2d)',
     'short': '8(Conv2d)'},
    {'name': 'model(AlexN

In [32]:
te.mt_log

['enter model(AlexNet)',
 'enter model(AlexNet).features(Sequential)',
 'enter model(AlexNet).features(Sequential).0(Conv2d)',
 'exit model(AlexNet).features(Sequential).0(Conv2d)',
 'enter model(AlexNet).features(Sequential).1(ReLU)',
 'exit model(AlexNet).features(Sequential).1(ReLU)',
 'enter model(AlexNet).features(Sequential).2(MaxPool2d)',
 'exit model(AlexNet).features(Sequential).2(MaxPool2d)',
 'enter model(AlexNet).features(Sequential).3(Conv2d)',
 'exit model(AlexNet).features(Sequential).3(Conv2d)',
 'enter model(AlexNet).features(Sequential).4(ReLU)',
 'exit model(AlexNet).features(Sequential).4(ReLU)',
 'enter model(AlexNet).features(Sequential).5(MaxPool2d)',
 'exit model(AlexNet).features(Sequential).5(MaxPool2d)',
 'enter model(AlexNet).features(Sequential).6(Conv2d)',
 'exit model(AlexNet).features(Sequential).6(Conv2d)',
 'enter model(AlexNet).features(Sequential).7(ReLU)',
 'exit model(AlexNet).features(Sequential).7(ReLU)',
 'enter model(AlexNet).features(Sequentia

### Check latest tensor stats

In [33]:
te.t.latest_df

Unnamed: 0,shape,mean,std,max,min,cnt_zero,zero_pct,module,ts,ttype,tname
0,"[2, 3, 224, 224]",-1.000057,0.577234,-0.000005,-1.999995,0,0.000000,model(AlexNet),2020-03-03 23:51:51,input,x
1,"[2, 3, 224, 224]",-1.000057,0.577234,-0.000005,-1.999995,0,0.000000,model(AlexNet).features(Sequential),2020-03-03 23:51:51,input,input
2,"[2, 3, 224, 224]",-1.000057,0.577234,-0.000005,-1.999995,0,0.000000,model(AlexNet).features(Sequential).0(Conv2d),2020-03-03 23:51:51,input,input
3,"[64, 3, 11, 11]",-0.000027,0.030262,0.052485,-0.052486,0,0.000000,model(AlexNet).features(Sequential).0(Conv2d),2020-03-03 23:51:51,weight,weight_0
4,"[64, 3, 11, 11]",0.000007,0.000028,0.000130,-0.000096,67,0.002884,model(AlexNet).features(Sequential).0(Conv2d),2020-03-03 23:51:51,weight_grad,grad_0
...,...,...,...,...,...,...,...,...,...,...,...
75,[1000],-0.000127,0.008769,0.015615,-0.015500,0,0.000000,model(AlexNet).classifier(Sequential).6(Linear),2020-03-03 23:51:53,weight,weight_1
76,[1000],0.002000,0.000000,0.002000,0.002000,0,0.000000,model(AlexNet).classifier(Sequential).6(Linear),2020-03-03 23:51:53,weight_grad,grad_1
77,"[2, 1000]",-0.000499,0.011397,0.033915,-0.030265,0,0.000000,model(AlexNet).classifier(Sequential).6(Linear),2020-03-03 23:51:53,output,output_0
78,"[2, 1000]",-0.000499,0.011397,0.033915,-0.030265,0,0.000000,model(AlexNet).classifier(Sequential),2020-03-03 23:51:53,output,output_0


### Redifine what you want to record

For the default statistic function, you can keep track shape, mean, std, max,min of a tensor.

The afore-mentioned tensor could mean all of the following
* module input tensors
* module output tensors
* module weight
* gradient of module weight

If you have more interesting metrics to follow, you can redifine the statistic tracking function

#### Redifine the weight tensor/ weight grad tensor  statitic function

In [34]:
@te.set_metric("weight")
def weight_stats(tensor):
    return {"num":tensor.numel(),"row_max":list(row.max().item() for row in tensor)}

#### Redifine the input or output statitic function

In [35]:
@te.set_metric("in")
def input_stats(tensor):
    return {"num":tensor.numel(),"row_min":list(row.min().item() for row in tensor)}

@te.set_metric("out")
def output_stats(tensor):
    return {"num":tensor.numel(),"row_min":list(row.min().item() for row in tensor)}

Let's give 1 forward pass again

In [36]:
model(samp)

tensor([[ 0.0178, -0.0009,  0.0027,  ..., -0.0085, -0.0016, -0.0087],
        [ 0.0171, -0.0024, -0.0035,  ...,  0.0063,  0.0160, -0.0035]],
       grad_fn=<AddmmBackward>)

The latest stats changed

In [37]:
te.t.latest_df

Unnamed: 0,num,row_min,module,ts,ttype,tname,row_max
0,301056,"[-1.9999693632125854, -1.9999945163726807]",model(AlexNet),2020-03-03 23:52:12,input,x,
1,301056,"[-1.9999693632125854, -1.9999945163726807]",model(AlexNet).features(Sequential),2020-03-03 23:52:12,input,input,
2,301056,"[-1.9999693632125854, -1.9999945163726807]",model(AlexNet).features(Sequential).0(Conv2d),2020-03-03 23:52:12,input,input,
3,23232,,model(AlexNet).features(Sequential).0(Conv2d),2020-03-03 23:52:12,weight,weight_0,"[0.05223393440246582, 0.052147045731544495, 0...."
4,23232,,model(AlexNet).features(Sequential).0(Conv2d),2020-03-03 23:52:12,weight_grad,grad_0,"[1.9010796677321196e-05, 7.584694685647264e-05..."
...,...,...,...,...,...,...,...
75,1000,,model(AlexNet).classifier(Sequential).6(Linear),2020-03-03 23:52:12,weight,weight_1,"[0.008336527273058891, 0.00010786205530166626,..."
76,1000,,model(AlexNet).classifier(Sequential).6(Linear),2020-03-03 23:52:12,weight_grad,grad_1,"[0.003000000026077032, 0.003000000026077032, 0..."
77,2000,"[-0.03156087547540665, -0.03723128139972687]",model(AlexNet).classifier(Sequential).6(Linear),2020-03-03 23:52:12,output,output_0,
78,2000,"[-0.03156087547540665, -0.03723128139972687]",model(AlexNet).classifier(Sequential),2020-03-03 23:52:12,output,output_0,


## Placing tracker on variables
To be experimented here

In [38]:
w = list(model.features.parameters())[0]

In [39]:
from types import BuiltinMethodType,BuiltinFunctionType

In [40]:
x1 = torch.rand(5,6)
x2 = torch.rand(5,6)
x3 = x1*6+x2

In [41]:
x2.numel()

30

In [42]:
x1.abs_()

tensor([[0.5395, 0.1158, 0.2436, 0.6968, 0.5318, 0.5479],
        [0.5569, 0.2847, 0.3291, 0.6477, 0.0778, 0.0844],
        [0.0548, 0.5794, 0.4905, 0.6806, 0.6258, 0.2134],
        [0.7322, 0.0742, 0.1259, 0.5159, 0.7725, 0.7967],
        [0.8471, 0.6944, 0.3707, 0.2552, 0.6063, 0.2456]])

In [43]:
from types import MethodType

In [44]:
import inspect

In [45]:
def TorchTensorEmber(x):
    class TensorEmber(x.__class__):
        def __init__(self,x):
            self.host_ = x
            attrs = dir(x)
            for attr in attrs:
                self.super_attr(attr)
            
        def super_attr(self,attr):
            if inspect.isbuiltin(getattr(self.host_,attr))==False: return 
            def func(self,*args,**kwargs):
                print(attr)
                return getattr(super(),attr)(*args,**kwargs)
            func.__name__ = attr
            setattr(self,attr, MethodType(func,self))
            return func
            
    return TensorEmber(x)

In [46]:
x2 = TorchTensorEmber(x2)

In [47]:
x2.add(x1)

add


tensor([[0.6532, 0.3970, 0.9357, 0.7104, 0.5795, 1.4977],
        [1.5565, 1.0672, 0.4137, 1.4910, 0.9587, 0.2552],
        [0.2027, 1.4541, 1.2611, 1.3104, 0.6991, 0.5013],
        [1.1817, 1.0199, 0.5722, 0.9614, 1.0298, 1.1048],
        [0.8859, 0.7230, 1.1873, 1.1150, 1.1920, 0.2781]])

In [48]:
x2+x1

tensor([[0.6532, 0.3970, 0.9357, 0.7104, 0.5795, 1.4977],
        [1.5565, 1.0672, 0.4137, 1.4910, 0.9587, 0.2552],
        [0.2027, 1.4541, 1.2611, 1.3104, 0.6991, 0.5013],
        [1.1817, 1.0199, 0.5722, 0.9614, 1.0298, 1.1048],
        [0.8859, 0.7230, 1.1873, 1.1150, 1.1920, 0.2781]])

## Placing tracker on optimizer
To be experimented here