# DocTour 

In [24]:
import pandas as pd
import inspect

In [25]:
class singleDoc(object):
    def __init__(self, name,doc="",level = -1):
        self.name = name.split(".")[-1]
        self.doc = doc if doc else ""
        self.parents = []
        self.kids = []
        self.names = []
        self.names.append(name)
        self.level = level
        self.source = ""
        
    def __repr__(self):
        return f"<{self.name}>"
#         return f"""
#         [Name:\t{self.name}][Level:\t{self.level}][Names:{len(self.names)}]
#         """
    
    def new_parent(self,parent):
        if parent:
            self.parents.append(parent)
            parent.kids.append(self)
            
    def new_name(self,nname):
        self.names.append(nname)
        
    def to_dicts(self,*cols):
        return dict((col,getattr(self,col)) for col in cols)


In [26]:
def mv_attr(*args):
    args_kv = dict((a,f"__{a}__") for a in args)
    def move(src,dst):
        for da,sa in args_kv.items():
            if hasattr(src, sa):
                setattr(dst, da, getattr(src, sa))
    return move

def get_source(obj):
    try: return inspect.getsource(obj)
    except: return ""

In [27]:
moves = mv_attr("doc")

In [28]:
class docTour(object):
    def __init__(self,root_obj,root_name, load_source = False):
        self.docs = dict()
        self.root_obj = root_obj
        self.load_source = load_source
        self.doc_parser(self.root_obj,root_name,name_chain = root_name)
        self.df = self.to_df()
        
    def mid(self,obj):
        """
        memory address
        """
        return hex(id(obj))
    
    def sort_score(self,df,score_field = "score"):
        return df.sort_values(by = score_field, ascending = False)
    
    def sch_name_short(self, nm):
        return self.sort_score(self.df[self.df.name.str.contains(nm)])
    
    def to_df(self):
        df = pd.DataFrame(list(i.to_dicts("name","doc","parents","kids","names","level","source") for i in self.docs.values()))
        df["p_ct"] = df.parents.apply(len)
        df["k_ct"] = df.kids.apply(len)
        df["score"] = df["k_ct"]+df["p_ct"] - df["level"]*2
        return df
                        
    def doc_parser(self,obj,name,level = 0,name_chain = "",parent = None):
        """
        Parse the sub structure of an object and tracing its documentation
        obj: python class/ object /function
        name: str, name of the object
        level:int, level count from the root obj
        name_chain: str
        parent: singleDoc,
        """
        addr = self.mid(obj)
        if addr in self.docs:
            if type(self.docs[addr])==singleDoc:
                sd = self.docs[addr]
                sd.new_name(name_chain)
                sd.new_parent(parent)
            return None
        
        if hasattr(obj,"__doc__"):
            sd = singleDoc(name_chain,level = level,)
            moves(obj,sd)
            sd.new_parent(parent)
            if self.load_source:
                sd.source = get_source(obj)
            self.docs[addr] = sd

        for attr_name in dir(obj):
            sub_obj = getattr(obj,attr_name)
            name_chain_ = name_chain+"."+attr_name
            if self.mid(sub_obj) in self.docs:
                sd = self.docs[self.mid(sub_obj)]
                sd.new_name(name_chain_)
                sd.new_parent(self.docs[addr])
                if self.load_source:
                    sd.source = get_source(sub_obj)
                continue
            elif "__" not in attr_name:
                if level<6:
                    try:
                        self.doc_parser(getattr(obj,attr_name),attr_name,level=level+1,name_chain = name_chain_,parent = self.docs[addr])
                    except Exception as e:
                        print(f"[ERROR]>>{name_chain_}")

In [29]:
import tensorflow

In [30]:
%time dt = docTour(tensorflow,"tensorflow")

[ERROR]>>tensorflow._api.v2.audio._sys.displayhook._
[ERROR]>>tensorflow._api.v2.compat.v1.AttrValue.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.ConfigProto.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.Event.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.GPUOptions.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.GraphDef.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.GraphOptions.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.HistogramProto.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.LogMessage.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.MetaGraphDef.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.NameAttrList.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.NodeDef.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.OptimizerOptions.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.RunMetadata.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.RunOptions.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.SessionLog.DESCRIPTOR
[ERROR]>>tensorflow._api.v2.compat.v1.Su



[ERROR]>>tensorflow._importlib._bootstrap._bootstrap_external.builtins.type
[ERROR]>>tensorflow._importlib.abc.abc.ABCMeta
CPU times: user 662 ms, sys: 17.2 ms, total: 679 ms
Wall time: 689 ms


In [31]:
gen = iter(dt.docs.items())

In [32]:
next(gen)

('0x120f49a70', <tensorflow>)

In [33]:
print(len(dt.docs))

9137


In [34]:
stats = dt.to_df()

In [35]:
stats.sort_values(by="p_ct",ascending=False)

Unnamed: 0,name,doc,parents,kids,names,level,source,p_ct,k_ct,score
1140,FromString,Creates new method instance from given seriali...,"[<Event>, <Event>, <_tf_api_names_v1>, <_tf_ap...","[<isupper>, <isupper>, <isupper>, <_decorator_...",[tensorflow._api.v2.compat.v1.Event.FromString...,6,,20956,7,20951
11,_decorator_argspec,,"[<_tf_decorator>, <_decorator_argspec>, <maket...","[<_decorator_argspec>, <bit_length>, <bit_leng...",[tensorflow.Assert._tf_decorator._decorator_ar...,3,,15674,4,15672
605,isupper,Return True if the string is an uppercase stri...,"[<_decorator_doc>, <_decorator_doc>, <_decorat...","[<bit_length>, <bit_length>, <bit_length>, <_d...",[tensorflow.TensorArray.close._tf_decorator._d...,5,,7430,6,7426
1268,FromString,Creates new method instance from given seriali...,"[<GraphOptions>, <GraphOptions>, <_extensions_...","[<FromString>, <FromString>, <FromString>, <_d...",[tensorflow._api.v2.compat.v1.GraphOptions.Fro...,6,,3316,10,3314
47,deleter,Descriptor to change the deleter on a property.,"[<is_numpy_compatible>, <fget>, <fget>, <fget>...","[<_decorator_argspec>, <is_numpy_compatible>, ...","[tensorflow.DType.is_numpy_compatible.deleter,...",3,,2585,3,2582
...,...,...,...,...,...,...,...,...,...,...
4614,cosine_decay,Applies cosine decay to the learning rate.\n\n...,[<train>],"[<_decorator_argspec>, <_decorator_argspec>, <...",[tensorflow._api.v2.compat.v1.train.cosine_decay],6,,1,3,-8
4612,checkpoint_exists,Checks whether a V1 or V2 checkpoint exists wi...,[<train>],"[<_decorator_argspec>, <_decorator_argspec>, <...",[tensorflow._api.v2.compat.v1.train.checkpoint...,6,,1,3,-8
4611,batch_join,Runs a list of tensors to fill a queue to crea...,[<train>],"[<_decorator_argspec>, <_decorator_argspec>]",[tensorflow._api.v2.compat.v1.train.batch_join],6,,1,2,-9
4610,batch,Creates batches of tensors in `tensors`. (depr...,[<train>],"[<_decorator_argspec>, <_decorator_argspec>]",[tensorflow._api.v2.compat.v1.train.batch],6,,1,2,-9


dt.docs

In [36]:
dt.sch_name_short("json")

Unnamed: 0,name,doc,parents,kids,names,level,source,p_ct,k_ct,score
1879,decode_json_example,Convert JSON-encoded Example records to binary...,"[<v1>, <io>, <v1>, <io>, <io>]","[<_decorator_argspec>, <FromString>, <FromStri...",[tensorflow._api.v2.compat.v1.decode_json_exam...,5,,5,10,5
8817,model_from_json,Parses a JSON model configuration file and ret...,[<models>],"[<FunctionType>, <_decorator_argspec>, <_decor...",[tensorflow.keras.models.model_from_json],3,,1,8,3
7780,to_json,Returns a JSON string containing the network c...,"[<LinearModel>, <WideDeepModel>]","[<FunctionType>, <_decorator_argspec>, <_decor...",[tensorflow.keras.experimental.LinearModel.to_...,4,,2,8,2
9004,to_json,Returns a JSON string containing the timeserie...,[<TimeseriesGenerator>],"[<FunctionType>, <_decorator_argspec>, <_decor...",[tensorflow.keras.preprocessing.sequence.Times...,5,,1,8,-1
9023,to_json,Returns a JSON string containing the tokenizer...,[<Tokenizer>],"[<FunctionType>, <_decorator_argspec>, <_decor...",[tensorflow.keras.preprocessing.text.Tokenizer...,5,,1,7,-2


In [75]:
inspect.getfile(ftorch)

'/Users/salvor/anaconda3/lib/python3.7/site-packages/forgebox/ftorch/__init__.py'

In [89]:
print(inspect.getclasstree([ftorch.train.Trainer]))

[(<class 'forgebox.train.Trainer'>, (<class 'object'>,)), [(<class 'forgebox.ftorch.train.Trainer'>, (<class 'forgebox.train.Trainer'>,))]]


In [69]:
import forgebox

In [74]:
from forgebox import ftorch

In [38]:
import importlib

In [43]:
importlib.import_module("pandas")

ModuleNotFoundError: No module named 'pandas.DataFrame'