# Torch Ember Core
> Analyzing How Model Improves

In [1]:
# default_exp core

In this tutorial, we'll use AlexNet as example, 
We can load AlexNet from ```torchvision```

In [2]:
from torchvision.models.alexnet import AlexNet
import torch

In [3]:
model = AlexNet()

#### Sample data
Create a sample data, something like 2 normalized images in a batch, size 224,224

In [4]:
samp = (torch.rand(2,3,224,224)-1)*2

In [5]:
model(samp).shape

torch.Size([2, 1000])

## File, log, data, directory handler

In [6]:
# export
import os
from pathlib import Path
import json
import pandas as pd
from datetime import datetime
import torch

class tracker(object):
    def __init__(self, libname, fname):
        self.libname = libname
        self.fname = fname
        self.home = Path(os.environ['HOME'])
        self.dir = self.home/f".{libname}"
        self.dir.mkdir(exist_ok = True) 
        self.data = self.dir/"data"
        self.data.mkdir(exist_ok = True) 
        self.log = self.dir/"log"
        self.log.mkdir(exist_ok = True) 
        self.log_path = self.log/self.fname
        
    def __repr__(self):
        return f"<{self.libname}:{self.fname}>"
        
    def mkdir(self, path):
        Path(path).mkdir(exist_ok=True)
        
    def __setitem__(self, fname,dict_):
        f = open(self.data/f"{fname}.json","w")
        f.write(json.dumps(dict_, indent = 2))
        f.close()
    
    def __getitem__(self,fname):
        return json.loads(open(self.data/f"{fname}.json","r").read())
        
    def logging(self,line):
        f = open(self.log_path,"a")
        f.write(line+"\n")
        f.close()
        return self.log_path
        
    def __add__(self,dict_):
        """
        add a dictionary to log
        """
        self.logging(json.dumps(dict_))
        return self
        
    def lines(self):
        return list(json.loads(i) for i in open(self.log_path).read().split("\n")[:-1])
    
    @property
    def ts(self):
        return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    @property
    def df(self):
        return pd.DataFrame(self.lines())
    
class emberTracker(tracker):
    def __init__(self, fname):
        super().__init__("torchember",fname)
        self.latest = self.log/f"{fname}_latest"
        self.latest_lines = ""
        
    def logging(self,line):
        f = open(self.log_path,"a")
        f.write(line+"\n")
        f.close()
        self.latest_lines+=(line+"\n")
        return self.log_path
    
    def refresh(self):
        """
        lastest always contain the record of the latest batch
        """
        f = open(self.latest,"w")
        f.write(self.latest_lines)
        f.close()
        self.latest_lines = ""
        return self.latest
    
    def latest_line_list(self):
        return list(json.loads(i) for i in open(self.latest).read().split("\n")[:-1])
    
    @property
    def latest_df(self):
        return pd.DataFrame(self.latest_line_list())
        

### Test on the data tracker

In [7]:
etrack = emberTracker("testmodel")

This is how we record the data within other codes

In [8]:
for i in range(10):
    etrack+{"col1":i,"col2":i*2,"col3":"hahahha"}

Preview log file

In [9]:
!cat {etrack.log_path}

{"col1": 0, "col2": 0, "col3": "hahahha"}
{"col1": 1, "col2": 2, "col3": "hahahha"}
{"col1": 2, "col2": 4, "col3": "hahahha"}
{"col1": 3, "col2": 6, "col3": "hahahha"}
{"col1": 4, "col2": 8, "col3": "hahahha"}
{"col1": 5, "col2": 10, "col3": "hahahha"}
{"col1": 6, "col2": 12, "col3": "hahahha"}
{"col1": 7, "col2": 14, "col3": "hahahha"}
{"col1": 8, "col2": 16, "col3": "hahahha"}
{"col1": 9, "col2": 18, "col3": "hahahha"}


Read log as dataframe

In [10]:
etrack.df

Unnamed: 0,col1,col2,col3
0,0,0,hahahha
1,1,2,hahahha
2,2,4,hahahha
3,3,6,hahahha
4,4,8,hahahha
5,5,10,hahahha
6,6,12,hahahha
7,7,14,hahahha
8,8,16,hahahha
9,9,18,hahahha


In [11]:
!rm {etrack.log_path}

### Save/ Get JSON data

Save dictionary to json data

In [12]:
etrack["about_this_model"]= {
    "property1":1,
    "property2":False,
    "property3":
        {"size":100,
         "speed":200}
    }

Read this meta data from json file

In [13]:
etrack["about_this_model"]["property2"]

False

## Torch Ember Core

The essence of torch ember, is to place trackers within modules.

It will decorate the ```forward``` function to achieve following purpose

* What variables come in/out of the module
* The happening sequence, containing relationships between sub-modules
* The statistics we want for further analysis, eg.
    * Min, Max, Mean, Std, of input / outpout tensors
    * Min, Max, Mean, Std, of model weights at this iteration
    * Min, Max, Mean, Std, of model weights grad at this iteration

In [14]:
# export
from types import MethodType
from datetime import datetime

class moduleTrack(object):
    def __init__(self,module, name=None, root_module = False):
        self.module = module
        module.module_tracker = self
        self.base_module = True if len(list(module.modules()))==1 else False
        self.root_module = root_module
        
        self.name = name if name else module.__class__.__name__
        self.id = id(module)
        self.children = []
        
    def __repr__(self):
        rt = f"<{self.name} @ {hex(self.id)}>"
        if hasattr(self,"input_dt"):
            rt+=f'\n\t[Inputs]{",".join(list(k+" "+str(list(v.shape)) for k,v in self.input_dt.items()))}'
        if hasattr(self,"output_dt"):
            rt+=f'\n\t[Outputs]{",".join(list(str(list(v.shape)) for v in self.output_dt))}'
        return rt

def get_stats(tensor):
    """
    The default statistic method, it will capture
    shape of the tensor
    mean, std, max, min of the tensor
    this will return a dictionary
    """
    return {"shape":list(tensor.shape),
            "mean":tensor.mean().item(), 
            "std":tensor.std().item(), 
            "max":tensor.max().item(), 
            "min":tensor.min().item()}

class torchEmber(object):
    def __init__(self, model):
        self.modules = dict()
        self.model = model
        if hasattr(model,"disarm"):
            model.disarm()
        
        self.model_name = self.model.__class__.__name__
        
        fname = f"{self.model_name}_{self.ts_str}"
        self.fname = fname
        
        self.t = emberTracker(fname)
        self.current_mt = None
        self.mt_log = []
        
        self.arm()
        
        self.how_record_in(get_stats)
        self.how_record_out(get_stats)
        self.how_record_weight(get_stats)
        self.t[f"base_{fname}"]={"start":self.t.ts, 
                                 "user":os.environ["USER"]}
        self.t[f"vis_{fname}"] = {"vis_type":"standard"}
        self.t[f"structure_{fname}"] = self.mod_tree()
        
    def parse_module(self,model, name, root_module = False):
        name = f"{name}({model.__class__.__name__})"
        mt = moduleTrack(model, name, root_module)
        self.modules[name]= mt
        model.forward = self.module_register(name,model)
        
        for cname,children in model.named_children():
            children_mt = self.parse_module(children,f"{name}.{cname}" )
            children_mt.parent = mt
            mt.children.append(children_mt)
        return mt
    
    def mod_tree(self):
        """
        Return the tree of module
        """
        return self.mod_tree_parse(self.model.module_tracker)
        
    def mod_tree_parse(self,mt):
        rt = {"name":mt.name, "short":mt.name.split(".")[-1]}
        if len(mt.children)>0:
            rt.update({"children":list(self.mod_tree_parse(i) for i in mt.children)})
        return rt
                
        
    @property
    def ts_str(self):
        return datetime.now().strftime("%Y%m%d_%H%M%S")
        
    def arm(self):
        """
        arming the tracing function to self.model
        """
        self.parse_module(self.model,"model", root_module = True)
            
    def disarm(self):
        """remove the tracing function"""
        for m in self.modules.values():
            self.recover(m)
            
    def recover(self, m):
        if hasattr(m.module.forward,"former"):
            m.module.forward = m.module.forward.former
            
    def rearm(self):
        self.disarm()
        self.arm()
    
    def reg_check(self,m):
        """
        register check
        """
        if hasattr(m.forward,"armed"):
            if m.forward.armed:
                return False
        return True
            
    def how_record_in(self,f):
        def record_input_core(this, tensor,extra_data):
            """
            extra_data: dict
            """
            dict_ = f(tensor)
            dict_.update(extra_data)
            this.t+dict_
            return dict_
        setattr(self,"record_input_core",MethodType(record_input_core,self))
        return self.record_input_core
    
    def how_record_out(self,f):
        def record_output_core(this, tensor,extra_data):
            """
            extra_data: dict
            """
            dict_ = f(tensor)
            dict_.update(extra_data)
            this.t+dict_
            return dict_
        setattr(self,"record_output_core",MethodType(record_output_core,self))
        return self.record_output_core
    
    def how_record_weight(self,f):
        def record_weight_core(this, tensor,extra_data):
            """
            extra_data: dict
            """
            dict_ = f(tensor)
            dict_.update(extra_data)
            this.t+dict_
            return dict_
        setattr(self,"record_weight_core",MethodType(record_weight_core,self))
        return self.record_weight_core
    
    def record_input(self,mt):
        """
        Record the input tensors of the moduleTrack
        """
        for k,tensor in mt.input_dt.items():
            self.record_input_core(tensor,
                          extra_data = {"module":mt.name,"ts":self.t.ts,"ttype":"input","tname":k})
            
    def record_output(self,mt):
        """
        Record the output tensors of the moduleTrack
        """
        for i in range(len(mt.output_dt)):
            tensor = mt.output_dt[i]
            self.record_output_core(tensor,
                          extra_data = {"module":mt.name,"ts":self.t.ts,"ttype":"output","tname":f"output_{i}"})
            
    def record_weight(self,mt):
        """
        Record the weights of the moduleTrack
        """
        if mt.base_module:
            i = 0
            for p in mt.module.parameters():
                self.record_weight_core(p.data, 
                              extra_data = {"module":mt.name,"ts":self.t.ts,
                                            "ttype":"weight","tname":f"weight_{i}"})
                if p.requires_grad and (p.grad!= None) :
                    self.record_weight_core(p.grad, 
                              extra_data = {"module":mt.name,"ts":self.t.ts,
                                            "ttype":"weight_grad","tname":f"grad_{i}"})
                i+=1
            
    def module_register(self,name,m):
        if self.reg_check(m) == False: return m.forward
        f = m.forward
        mt = self.modules[name]
        vs = f.__code__.co_varnames
        mt.vars = vs[1:]
        
        def new_forward(*args,**kwargs):
            mt.input_dt = dict(zip(mt.vars[:len(args)],args))
            mt.input_dt.update(kwargs)
            
            self.record_input(mt)
            self.current_mt = mt
            if mt.root_module: self.mt_log=[]
            self.mt_log.append(f"enter {mt.name}")
            
            # ------execution of the function------
            outputs = f(*args,**kwargs)
            self.record_weight(mt)
            # ------execution of the function------
            
            self.mt_log.append(f"exit {mt.name}")
            
            if type(outputs) in [list,tuple]:
                mt.output_dt = [outputs]
            else:
                mt.output_dt = [outputs,]
            self.record_output(mt)
            
            if mt.root_module:
                self.t.refresh() # start a new "latest" file
            
            return outputs
        
        setattr(new_forward,"armed",True)
        setattr(new_forward,"former",f)
        
        def disarm(this):
            """
            Remove the trackers placed by torchember
            run model.disarm()
            """
            self.disarm()
            return this
        setattr(mt.module, "disarm",MethodType(disarm,mt.module))
        return new_forward

### Tracking a model !!

Start tracking a model

In [15]:
te = torchEmber(model)

Remove the trackers we placed

In [16]:
model = model.disarm()

Or like this

In [17]:
te.disarm()

Okay, refresh the tracker

In [18]:
te.rearm()

Run forward pass for 3 iterations, nothing strange happend

In [19]:
for i in range(3):
    model(samp)

### Check snowballing tensor stats

In [20]:
te.t.df

Unnamed: 0,shape,mean,std,max,min,module,ts,ttype,tname
0,"[2, 3, 224, 224]",-1.000464e+00,0.577547,-0.000002,-1.999995,model(AlexNet),2020-02-15 13:44:48,input,x
1,"[2, 3, 224, 224]",-1.000464e+00,0.577547,-0.000002,-1.999995,model(AlexNet).features(Sequential),2020-02-15 13:44:48,input,input
2,"[2, 3, 224, 224]",-1.000464e+00,0.577547,-0.000002,-1.999995,model(AlexNet).features(Sequential).0(Conv2d),2020-02-15 13:44:48,input,input
3,"[64, 3, 11, 11]",1.328739e-04,0.030303,0.052484,-0.052484,model(AlexNet).features(Sequential).0(Conv2d),2020-02-15 13:44:48,weight,weight_0
4,[64],3.457826e-05,0.030119,0.049655,-0.052481,model(AlexNet).features(Sequential).0(Conv2d),2020-02-15 13:44:48,weight,weight_1
...,...,...,...,...,...,...,...,...,...
187,"[1000, 4096]",9.627095e-07,0.009022,0.015625,-0.015625,model(AlexNet).classifier(Sequential).6(Linear),2020-02-15 13:44:49,weight,weight_0
188,[1000],-3.209895e-04,0.009133,0.015583,-0.015590,model(AlexNet).classifier(Sequential).6(Linear),2020-02-15 13:44:49,weight,weight_1
189,"[2, 1000]",-1.890486e-04,0.011266,0.029219,-0.030190,model(AlexNet).classifier(Sequential).6(Linear),2020-02-15 13:44:49,output,output_0
190,"[2, 1000]",-1.890486e-04,0.011266,0.029219,-0.030190,model(AlexNet).classifier(Sequential),2020-02-15 13:44:49,output,output_0


Now let's start record weight grad data, once we use backward(), we'll soon have grad data kick in when next forward pass is called

### Track weight gradients

In [21]:
for i in range(3):
    model(samp).mean().backward()

As you can see here, for conv layer, 
* grad_0 is for the 1st weight grad tensor(weight), 
* grad_1 is for the 2nd(bias)

In [22]:
te.t.df[te.t.df["ttype"]=="weight_grad"].head()

Unnamed: 0,shape,mean,std,max,min,module,ts,ttype,tname
260,"[64, 3, 11, 11]",1.592942e-06,1.6e-05,5.2e-05,-5.6e-05,model(AlexNet).features(Sequential).0(Conv2d),2020-02-15 13:44:50,weight_grad,grad_0
262,[64],-1.558506e-06,1.4e-05,2.9e-05,-2.9e-05,model(AlexNet).features(Sequential).0(Conv2d),2020-02-15 13:44:50,weight_grad,grad_1
270,"[192, 64, 5, 5]",-1.132841e-06,1.3e-05,0.000113,-0.000131,model(AlexNet).features(Sequential).3(Conv2d),2020-02-15 13:44:50,weight_grad,grad_0
272,[192],-2.106908e-06,1.7e-05,5.6e-05,-6.5e-05,model(AlexNet).features(Sequential).3(Conv2d),2020-02-15 13:44:50,weight_grad,grad_1
280,"[384, 192, 3, 3]",-2.286145e-07,1.1e-05,0.000155,-0.000168,model(AlexNet).features(Sequential).6(Conv2d),2020-02-15 13:44:50,weight_grad,grad_0


### Module tree json
This file will be stored at ```$HOME/.torchember/data/structure_<modelname>_<date>_<time>.json```

In [23]:
te.mod_tree()

{'name': 'model(AlexNet)',
 'short': 'model(AlexNet)',
 'children': [{'name': 'model(AlexNet).features(Sequential)',
   'short': 'features(Sequential)',
   'children': [{'name': 'model(AlexNet).features(Sequential).0(Conv2d)',
     'short': '0(Conv2d)'},
    {'name': 'model(AlexNet).features(Sequential).1(ReLU)',
     'short': '1(ReLU)'},
    {'name': 'model(AlexNet).features(Sequential).2(MaxPool2d)',
     'short': '2(MaxPool2d)'},
    {'name': 'model(AlexNet).features(Sequential).3(Conv2d)',
     'short': '3(Conv2d)'},
    {'name': 'model(AlexNet).features(Sequential).4(ReLU)',
     'short': '4(ReLU)'},
    {'name': 'model(AlexNet).features(Sequential).5(MaxPool2d)',
     'short': '5(MaxPool2d)'},
    {'name': 'model(AlexNet).features(Sequential).6(Conv2d)',
     'short': '6(Conv2d)'},
    {'name': 'model(AlexNet).features(Sequential).7(ReLU)',
     'short': '7(ReLU)'},
    {'name': 'model(AlexNet).features(Sequential).8(Conv2d)',
     'short': '8(Conv2d)'},
    {'name': 'model(AlexN

In [24]:
te.mt_log

['enter model(AlexNet)',
 'enter model(AlexNet).features(Sequential)',
 'enter model(AlexNet).features(Sequential).0(Conv2d)',
 'exit model(AlexNet).features(Sequential).0(Conv2d)',
 'enter model(AlexNet).features(Sequential).1(ReLU)',
 'exit model(AlexNet).features(Sequential).1(ReLU)',
 'enter model(AlexNet).features(Sequential).2(MaxPool2d)',
 'exit model(AlexNet).features(Sequential).2(MaxPool2d)',
 'enter model(AlexNet).features(Sequential).3(Conv2d)',
 'exit model(AlexNet).features(Sequential).3(Conv2d)',
 'enter model(AlexNet).features(Sequential).4(ReLU)',
 'exit model(AlexNet).features(Sequential).4(ReLU)',
 'enter model(AlexNet).features(Sequential).5(MaxPool2d)',
 'exit model(AlexNet).features(Sequential).5(MaxPool2d)',
 'enter model(AlexNet).features(Sequential).6(Conv2d)',
 'exit model(AlexNet).features(Sequential).6(Conv2d)',
 'enter model(AlexNet).features(Sequential).7(ReLU)',
 'exit model(AlexNet).features(Sequential).7(ReLU)',
 'enter model(AlexNet).features(Sequentia

### Check latest tensor stats

In [25]:
te.t.latest_df

Unnamed: 0,shape,mean,std,max,min,module,ts,ttype,tname
0,"[2, 3, 224, 224]",-0.999797,0.577226,-0.000034,-1.999990,model(AlexNet),2020-02-15 13:26:31,input,x
1,"[2, 3, 224, 224]",-0.999797,0.577226,-0.000034,-1.999990,model(AlexNet).features(Sequential),2020-02-15 13:26:31,input,input
2,"[2, 3, 224, 224]",-0.999797,0.577226,-0.000034,-1.999990,model(AlexNet).features(Sequential).0(Conv2d),2020-02-15 13:26:31,input,input
3,"[64, 3, 11, 11]",-0.000078,0.030249,0.052484,-0.052484,model(AlexNet).features(Sequential).0(Conv2d),2020-02-15 13:26:31,weight,weight_0
4,"[64, 3, 11, 11]",-0.000004,0.000032,0.000108,-0.000138,model(AlexNet).features(Sequential).0(Conv2d),2020-02-15 13:26:31,weight_grad,grad_0
...,...,...,...,...,...,...,...,...,...
75,[1000],-0.000238,0.009068,0.015625,-0.015593,model(AlexNet).classifier(Sequential).6(Linear),2020-02-15 13:26:32,weight,weight_1
76,[1000],0.002000,0.000000,0.002000,0.002000,model(AlexNet).classifier(Sequential).6(Linear),2020-02-15 13:26:32,weight_grad,grad_1
77,"[2, 1000]",-0.000153,0.011775,0.035772,-0.031940,model(AlexNet).classifier(Sequential).6(Linear),2020-02-15 13:26:32,output,output_0
78,"[2, 1000]",-0.000153,0.011775,0.035772,-0.031940,model(AlexNet).classifier(Sequential),2020-02-15 13:26:32,output,output_0


### Redifine what you want to record

For the default statistic function, you can keep track shape, mean, std, max,min of a tensor.

The afore-mentioned tensor could mean all of the following
* module input tensors
* module output tensors
* module weight
* gradient of module weight

If you have more interesting metrics to follow, you can redifine the statistic tracking function

#### Redifine the weight tensor/ weight grad tensor  statitic function

In [26]:
@te.how_record_weight
def weight_stats(tensor):
    return {"num":tensor.numel(),"row_max":list(row.max().item() for row in tensor)}

#### Redifine the input or output statitic function

In [27]:
@te.how_record_in
def input_stats(tensor):
    return {"num":tensor.numel(),"row_min":list(row.min().item() for row in tensor)}

@te.how_record_out
def output_stats(tensor):
    return {"num":tensor.numel(),"row_min":list(row.min().item() for row in tensor)}

Let's give 1 forward pass again

In [28]:
model(samp)

tensor([[-0.0026,  0.0113,  0.0274,  ..., -0.0106,  0.0031, -0.0232],
        [ 0.0002,  0.0107,  0.0146,  ..., -0.0096, -0.0030, -0.0162]],
       grad_fn=<AddmmBackward>)

The latest stats changed

In [29]:
te.t.latest_df

Unnamed: 0,num,row_min,module,ts,ttype,tname,row_max
0,301056,"[-1.9999864101409912, -1.9999747276306152]",model(AlexNet),2020-02-15 13:25:55,input,x,
1,301056,"[-1.9999864101409912, -1.9999747276306152]",model(AlexNet).features(Sequential),2020-02-15 13:25:55,input,input,
2,301056,"[-1.9999864101409912, -1.9999747276306152]",model(AlexNet).features(Sequential).0(Conv2d),2020-02-15 13:25:55,input,input,
3,23232,,model(AlexNet).features(Sequential).0(Conv2d),2020-02-15 13:25:55,weight,weight_0,"[0.05188900977373123, 0.052291177213191986, 0...."
4,23232,,model(AlexNet).features(Sequential).0(Conv2d),2020-02-15 13:25:55,weight_grad,grad_0,"[2.2261992853600532e-05, 4.5778775529470295e-0..."
...,...,...,...,...,...,...,...
75,1000,,model(AlexNet).classifier(Sequential).6(Linear),2020-02-15 13:25:55,weight,weight_1,"[0.004774011671543121, 0.01551712118089199, 0...."
76,1000,,model(AlexNet).classifier(Sequential).6(Linear),2020-02-15 13:25:55,weight_grad,grad_1,"[0.003000000026077032, 0.003000000026077032, 0..."
77,2000,"[-0.036001455038785934, -0.037717852741479874]",model(AlexNet).classifier(Sequential).6(Linear),2020-02-15 13:25:55,output,output_0,
78,2000,"[-0.036001455038785934, -0.037717852741479874]",model(AlexNet).classifier(Sequential),2020-02-15 13:25:55,output,output_0,


## Placing tracker on variables
To be experimented here

In [160]:
w = list(model.features.parameters())[0]

In [161]:
x1 = torch.rand(5,6)
x2 = torch.rand(5,6)
x3 = x1*6+x2

In [162]:
x2.numel()

30

## Placing tracker on optimizer
To be experimented here