# Utilities
> Build utilities for flask

In [1]:
# default_exp utils

In [2]:
# export
import os
from pathlib import Path
import json
import pandas as pd
import numpy as np

In [3]:
# export
HOME = Path(os.environ["HOME"])
EMBER = HOME/".torchember"
DATA = EMBER/"data"
LOG = EMBER/"LOG"
def check_existance():
    if DATA.exists()==False:
        return False
    if LOG.exists()==False:
        return False
    else:
        return True
    

Check if the data file exists, if not, use torch ember in your pytorch modeling first according to the short tutorial

In [4]:
check_existance()

True

In [5]:
# export
def get_ember_list():
    if check_existance()== False:
        return None
    else:
        return  list(i for i in os.listdir(DATA) if i[:5]=="base_")

In [6]:
get_ember_list()[:5]

['base_AlexNet_20200215_132629.json',
 'base_AlexNet_20200226_002834.json',
 'base_AlexNet_20200211_230327.json',
 'base_AlexNet_20200211_192928.json',
 'base_AlexNet_20200209_183014.json']

In [1]:
# export
def unpack_meta(fname):
    f = open(DATA/fname,"r")
    dict_ = json.loads(f.read())
    f.close()
    dict_["name"] = fname[5:-5]
    return dict_

def get_ember_df(ember_list):
    """
    list out the latest 5 tracking record metadata
    """
    if ember_list ==None: return None
    df = pd.DataFrame(list(unpack_meta(i) for i in ember_list))
    df = df.sort_values(by = "start",ascending = False)
    return df.reset_index().drop("index",axis=1)

def get_ember_record():
    ember_list = get_ember_list()
    if ember_list==None:  return {}
    ember_df = get_ember_df(ember_list)
    
    ember_df["latest"] = list(range(len(ember_df)))
    return ember_df.to_dict(orient="record")

In [14]:
df = get_ember_df(get_ember_list())
df.head(5)

Unnamed: 0,start,user,name
0,2020-03-07 10:37:37,salvor,AlexNet_20200307_103737
1,2020-03-07 10:35:52,salvor,AlexNet_20200307_103552
2,2020-03-05 23:12:37,salvor,AlexNet_20200305_231237
3,2020-03-05 23:10:04,salvor,AlexNet_20200305_231004
4,2020-03-03 23:50:54,salvor,AlexNet_20200303_235054


In [15]:
get_ember_record()

[{'start': '2020-03-07 10:37:37',
  'user': 'salvor',
  'name': 'AlexNet_20200307_103737',
  'latest': 0},
 {'start': '2020-03-07 10:35:52',
  'user': 'salvor',
  'name': 'AlexNet_20200307_103552',
  'latest': 1},
 {'start': '2020-03-05 23:12:37',
  'user': 'salvor',
  'name': 'AlexNet_20200305_231237',
  'latest': 2},
 {'start': '2020-03-05 23:10:04',
  'user': 'salvor',
  'name': 'AlexNet_20200305_231004',
  'latest': 3},
 {'start': '2020-03-03 23:50:54',
  'user': 'salvor',
  'name': 'AlexNet_20200303_235054',
  'latest': 4},
 {'start': '2020-03-03 23:44:40',
  'user': 'salvor',
  'name': 'AlexNet_20200303_234440',
  'latest': 5},
 {'start': '2020-03-03 23:41:30',
  'user': 'salvor',
  'name': 'AlexNet_20200303_234130',
  'latest': 6},
 {'start': '2020-03-03 23:39:23',
  'user': 'salvor',
  'name': 'AlexNet_20200303_233923',
  'latest': 7},
 {'start': '2020-02-26 00:28:34',
  'user': 'salvor',
  'name': 'AlexNet_20200226_002834',
  'latest': 8},
 {'start': '2020-02-26 00:25:28',
  '

### Ember Reader
A reading handler to process Torch Ember data

In [9]:
# export
from torchember.helper import emberTracker

class emberReader(object):
    def __init__(self, name,verbose = False):
        self.verbose = verbose
        self.name = name
        self.t = emberTracker(name)
        self.structure = self.t[f"structure_{self.name}"]
        self.base = self.t[f"base_{self.name}"]
        if self.verbose:print(self.t.log_files)
        
    @property
    def latest(self):
        return self.t.latest_df.to_dict(orient = "record")
    
    def read_file(self,filename):
        return open(self.t.log_path/filename,"r").read()
    
    def read_log(self,log_name):
        return "["+str(self.read_file(log_name)[1:])+"]"
    
    def json_df(self,log_name):
        return pd.DataFrame(json.loads(self.read_log(log_name)))

In [10]:
er = emberReader("AlexNet_20200307_103737",verbose = True)

['init-00_phase-valid_epoch-0.log', 'init-00_phase-train_epoch-0.log', 'init-00_phase-train_epoch-1.log', 'init-00_phase-valid_epoch-1.log']


In [11]:
er.structure

{'name': 'model(AlexNet)',
 'short': 'model(AlexNet)',
 'children': [{'name': 'model(AlexNet).features(Sequential)',
   'short': 'features(Sequential)',
   'children': [{'name': 'model(AlexNet).features(Sequential).0(Conv2d)',
     'short': '0(Conv2d)'},
    {'name': 'model(AlexNet).features(Sequential).1(ReLU)',
     'short': '1(ReLU)'},
    {'name': 'model(AlexNet).features(Sequential).2(MaxPool2d)',
     'short': '2(MaxPool2d)'},
    {'name': 'model(AlexNet).features(Sequential).3(Conv2d)',
     'short': '3(Conv2d)'},
    {'name': 'model(AlexNet).features(Sequential).4(ReLU)',
     'short': '4(ReLU)'},
    {'name': 'model(AlexNet).features(Sequential).5(MaxPool2d)',
     'short': '5(MaxPool2d)'},
    {'name': 'model(AlexNet).features(Sequential).6(Conv2d)',
     'short': '6(Conv2d)'},
    {'name': 'model(AlexNet).features(Sequential).7(ReLU)',
     'short': '7(ReLU)'},
    {'name': 'model(AlexNet).features(Sequential).8(Conv2d)',
     'short': '8(Conv2d)'},
    {'name': 'model(AlexN

Basical information

In [28]:
er.base

{'start': '2020-03-07 10:37:37', 'user': 'salvor'}

List the log files under log directory on this module task

In [29]:
er.t.log_files

['init-00_phase-valid_epoch-0.log',
 'init-00_phase-train_epoch-0.log',
 'init-00_phase-train_epoch-1.log',
 'init-00_phase-valid_epoch-1.log']

Latest record as dataframe

In [30]:
er.t.latest_df

Unnamed: 0,shape,mean,std,max,min,cnt_zero,zero_pct,module,ts,ttype,tname,n_batch
0,"[2, 3, 224, 224]",-1.001885,0.577536,-0.000016,-1.999997,0,0.0,model(AlexNet),2020-03-07 10:37:52,input,x,1
1,"[2, 3, 224, 224]",-1.001885,0.577536,-0.000016,-1.999997,0,0.0,model(AlexNet).features(Sequential),2020-03-07 10:37:52,input,input,1
2,"[2, 3, 224, 224]",-1.001885,0.577536,-0.000016,-1.999997,0,0.0,model(AlexNet).features(Sequential).0(Conv2d),2020-03-07 10:37:52,input,input,1
3,"[64, 3, 11, 11]",-0.000152,0.030288,0.052461,-0.052483,0,0.0,model(AlexNet).features(Sequential).0(Conv2d),2020-03-07 10:37:52,weight,weight_0,1
4,[64],0.003515,0.027836,0.051731,-0.044360,0,0.0,model(AlexNet).features(Sequential).0(Conv2d),2020-03-07 10:37:52,weight,weight_1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
59,"[1000, 4096]",0.000008,0.009020,0.015625,-0.015625,0,0.0,model(AlexNet).classifier(Sequential).6(Linear),2020-03-07 10:37:53,weight,weight_0,1
60,[1000],0.000555,0.008901,0.015613,-0.015598,0,0.0,model(AlexNet).classifier(Sequential).6(Linear),2020-03-07 10:37:53,weight,weight_1,1
61,"[2, 1000]",0.000637,0.011510,0.033282,-0.033744,0,0.0,model(AlexNet).classifier(Sequential).6(Linear),2020-03-07 10:37:53,output,output_0,1
62,"[2, 1000]",0.000637,0.011510,0.033282,-0.033744,0,0.0,model(AlexNet).classifier(Sequential),2020-03-07 10:37:53,output,output_0,1


Load a log file as a long list of dictionary

In [35]:
json.loads(er.read_log('init-00_phase-valid_epoch-1.log'))

[{'shape': [2, 3, 224, 224],
  'mean': -1.001885175704956,
  'std': 0.5775364637374878,
  'max': -1.5735626220703125e-05,
  'min': -1.9999974966049194,
  'cnt_zero': 0,
  'zero_pct': 0.0,
  'module': 'model(AlexNet)',
  'ts': '2020-03-07 10:37:50',
  'ttype': 'input',
  'tname': 'x',
  'n_batch': 0},
 {'shape': [2, 3, 224, 224],
  'mean': -1.001885175704956,
  'std': 0.5775364637374878,
  'max': -1.5735626220703125e-05,
  'min': -1.9999974966049194,
  'cnt_zero': 0,
  'zero_pct': 0.0,
  'module': 'model(AlexNet).features(Sequential)',
  'ts': '2020-03-07 10:37:50',
  'ttype': 'input',
  'tname': 'input',
  'n_batch': 0},
 {'shape': [2, 3, 224, 224],
  'mean': -1.001885175704956,
  'std': 0.5775364637374878,
  'max': -1.5735626220703125e-05,
  'min': -1.9999974966049194,
  'cnt_zero': 0,
  'zero_pct': 0.0,
  'module': 'model(AlexNet).features(Sequential).0(Conv2d)',
  'ts': '2020-03-07 10:37:50',
  'ttype': 'input',
  'tname': 'input',
  'n_batch': 0},
 {'shape': [64, 3, 11, 11],
  'mea

Return log file as dataframe

In [31]:
er.json_df('init-00_phase-valid_epoch-1.log')

Unnamed: 0,shape,mean,std,max,min,cnt_zero,zero_pct,module,ts,ttype,tname,n_batch
0,"[2, 3, 224, 224]",-1.001885,0.577536,-0.000016,-1.999997,0,0.0,model(AlexNet),2020-03-07 10:37:50,input,x,0
1,"[2, 3, 224, 224]",-1.001885,0.577536,-0.000016,-1.999997,0,0.0,model(AlexNet).features(Sequential),2020-03-07 10:37:50,input,input,0
2,"[2, 3, 224, 224]",-1.001885,0.577536,-0.000016,-1.999997,0,0.0,model(AlexNet).features(Sequential).0(Conv2d),2020-03-07 10:37:50,input,input,0
3,"[64, 3, 11, 11]",-0.000152,0.030288,0.052461,-0.052483,0,0.0,model(AlexNet).features(Sequential).0(Conv2d),2020-03-07 10:37:50,weight,weight_0,0
4,[64],0.003515,0.027836,0.051731,-0.044360,0,0.0,model(AlexNet).features(Sequential).0(Conv2d),2020-03-07 10:37:50,weight,weight_1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
123,"[1000, 4096]",0.000008,0.009020,0.015625,-0.015625,0,0.0,model(AlexNet).classifier(Sequential).6(Linear),2020-03-07 10:37:53,weight,weight_0,1
124,[1000],0.000555,0.008901,0.015613,-0.015598,0,0.0,model(AlexNet).classifier(Sequential).6(Linear),2020-03-07 10:37:53,weight,weight_1,1
125,"[2, 1000]",0.000637,0.011510,0.033282,-0.033744,0,0.0,model(AlexNet).classifier(Sequential).6(Linear),2020-03-07 10:37:53,output,output_0,1
126,"[2, 1000]",0.000637,0.011510,0.033282,-0.033744,0,0.0,model(AlexNet).classifier(Sequential),2020-03-07 10:37:53,output,output_0,1


Latest JSON data

In [32]:
er.latest

[{'shape': [2, 3, 224, 224],
  'mean': -1.001885175704956,
  'std': 0.5775364637374878,
  'max': -1.5735626220703125e-05,
  'min': -1.9999974966049194,
  'cnt_zero': 0,
  'zero_pct': 0.0,
  'module': 'model(AlexNet)',
  'ts': '2020-03-07 10:37:52',
  'ttype': 'input',
  'tname': 'x',
  'n_batch': 1},
 {'shape': [2, 3, 224, 224],
  'mean': -1.001885175704956,
  'std': 0.5775364637374878,
  'max': -1.5735626220703125e-05,
  'min': -1.9999974966049194,
  'cnt_zero': 0,
  'zero_pct': 0.0,
  'module': 'model(AlexNet).features(Sequential)',
  'ts': '2020-03-07 10:37:52',
  'ttype': 'input',
  'tname': 'input',
  'n_batch': 1},
 {'shape': [2, 3, 224, 224],
  'mean': -1.001885175704956,
  'std': 0.5775364637374878,
  'max': -1.5735626220703125e-05,
  'min': -1.9999974966049194,
  'cnt_zero': 0,
  'zero_pct': 0.0,
  'module': 'model(AlexNet).features(Sequential).0(Conv2d)',
  'ts': '2020-03-07 10:37:52',
  'ttype': 'input',
  'tname': 'input',
  'n_batch': 1},
 {'shape': [64, 3, 11, 11],
  'mea

## io cleaner

In [3]:
import torch

In [24]:
# export
from torch import is_tensor
def clean_kv(k,v):
    if is_tensor(v):
        return {k:v}
    else:
        rt = dict()
        if type(v)==dict:
            for vk,vv in v.items():
                rt.update(clean_kv(f"{k}.{vk}",vv))
            return rt
        elif type(v) in [list,tuple,set]:
            for i in range(len(v)):
                vv = v[i]
                rt.update(clean_kv(f"{k}.tsr{i}",vv))
            return rt
        else:
            return {}
    
def io_cleaner(**kwargs):
    """
    Cleaning up the tensor input/output
    to a uniformed format of k,v 
    * k is the tensor name
    * v is a tensor 
    The inherent idea is to break down list,tuple,set, dictionary
    ,at any level
    return dictionary
    """
    dict_clean = dict()
    for k,v in kwargs.items():
        dict_clean.update(clean_kv(k,v))
    return dict_clean
        

Test for a very bizarre case of input/output

In [25]:
a = torch.rand(2,2)
b = torch.rand(2,2)
c = torch.rand(2,2)
d = torch.rand(2,2)
e = torch.rand(2,2)
result = io_cleaner(a = a, 
                   b= (b,c),
                   d = {"f":d},
                   e = {"e1":[a,b,{"e2":e}]})
result.keys(), list(is_tensor(v) for v in result.values())

(dict_keys(['a', 'b.tsr0', 'b.tsr1', 'd.f', 'e.e1.tsr0', 'e.e1.tsr1', 'e.e1.tsr2.e2']),
 [True, True, True, True, True, True, True])