# Parsing Python
> The core foundation of this entire package

## SQL alchemy models/objects

We use sqlalchemy orm to manage a single target.

In [None]:
from sqlalchemy import Column, Text, Integer, ForeignKey
from flask_appbuilder import Model
from sqlalchemy.orm import relationship

A docModel could be a class, a function or a module (all python)

In [None]:
class docModel(Model):
    __tablename__ = "docs"
    id = Column(Integer, primary_key=True, autoincrement=False)
    name = Column(Text())
    doc = Column(Text(), default="")
    names = Column(Text(), default="")
    level = Column(Integer(), default=-1)
    source = Column(Text(), default="")
    path = Column(Text(), default="")
    code = Column(Text(), default="")
    ctype = Column(Text(), default="")
    alias = Column(Text(), default="")

    def __repr__(self):
        return f"<{self.name}>"

    def new_parent(self, parent):
        if parent:
            self.parents.append(parent)
            parent.kids.append(self)

    def to_dicts(self, *cols):
        return dict((col, getattr(self, col)) for col in cols)

Test a docModel

In [None]:
abcdoc = docModel(name="abc",names="abc,ABC", level=2, ctype="function",alias="abc")
abcdoc

<abc>

In [None]:
abcdoc.to_dicts("name", "names", "level", "ctype", "alias")

{'name': 'abc',
 'names': 'abc,ABC',
 'level': 2,
 'ctype': 'function',
 'alias': 'abc'}

## Relations between targets
There are 2 sorts of relation between 2 docModels

### Attributes relation
b is the attribute of a,  we can get b by ```a.b```, or ```getattr(a,"b")```

In [None]:
class docGraphModel(Model):
    __tablename__ = "doc_graph"
    id = Column(Integer, primary_key=True)
    parent_id = Column(Integer(), ForeignKey("docs.id"))
    parent = relationship(docModel, foreign_keys=[parent_id])
    kid_id = Column(Integer(), ForeignKey("docs.id"))
    kid = relationship(docModel, foreign_keys=[kid_id])
    
# Mapping the self-relate docs on attribute relations
docModel.kids = relationship(docModel,
                             secondary="doc_graph",
                             primaryjoin=(docModel.id == docGraphModel.parent_id),
                             secondaryjoin=(docGraphModel.kid_id == docModel.id)
                             )
docModel.parents = relationship(docModel,
                                secondary="doc_graph",
                                primaryjoin=(docModel.id == docGraphModel.kid_id),
                                secondaryjoin=(docGraphModel.parent_id == docModel.id)
                                )

### Inheritance relation

We define inheritance as following

In [None]:
class b(object):
    fromb = "b"
    pass

class a(b):
    froma = "a"
    
class c(a,b):
    fromc = "c"

In this case we can say:
* b is the descendant of object
* a is the descendant of b
* c is the descendant from a and b
* b is the ancestor of a

In [None]:
c.froma, c.fromb,c.fromc

('a', 'b', 'c')

In [None]:
class inhGraphModel(Model):
    __tablename__ = "inh_graph"
    id = Column(Integer, primary_key=True)
    anc_id = Column(Integer(), ForeignKey("docs.id")) # ancestor
    anc = relationship(docModel, foreign_keys=[anc_id])
    des_id = Column(Integer(), ForeignKey("docs.id")) # descendant
    des = relationship(docModel, foreign_keys=[des_id])

# Mappping the self-relate docs on class inheritance
docModel.dess = relationship(docModel,
                             secondary="inh_graph",
                             primaryjoin=(docModel.id == inhGraphModel.anc_id),
                             secondaryjoin=(inhGraphModel.des_id == docModel.id)
                             )
docModel.ancs = relationship(docModel,
                                secondary="inh_graph",
                                primaryjoin=(docModel.id == inhGraphModel.des_id),
                                secondaryjoin=(inhGraphModel.anc_id == docModel.id)
                                )

## Parse a class, function, module 
of its attributes/being attributes of others, inheritances

In [None]:
# default_exp base.parse

In [None]:
# export
from doctour.base.doc import docModel, docGraphModel, inhGraphModel

In [None]:
# export
# hide
import pandas as pd
import inspect

import logging
from sqlalchemy import create_engine as ce
from sqlalchemy.orm.session import sessionmaker
import os

In [None]:
# export
Session = sessionmaker()
basedir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))+"/app/dbs"

In [None]:
# hide
Session = sessionmaker()
basedir = "/Users/salvor/Downloads/"

### Some helper functions

In [None]:
# export
def most_frequent(List): return max(set(List), key = List.count)

def get_source(obj):
    try:
        return inspect.getsource(obj)
    except:
        return ""

def get_path(obj):
    try:
        return inspect.getsourcefile(obj)
    except:
        return ""

def checks(obj):
    if inspect.ismodule(obj):return "module"
    if inspect.isclass(obj):return "class"
    if inspect.isfunction(obj):return "function"
    return False

def refresh_table(model,engine):
    """
    Drop the existed table from database, if exist
    In any case create a new table 
    """
    table = model.__table__
    if table.exists(engine):
        logging.info(f"dropping existing table: {str(table)}")
        table.drop(engine)
    logging.info(f"creating a new table: {str(table)}")
    table.create(engine)

### The core mechanism

The core mechanism of this entire package

In [None]:
# export
class docTour(object):
    def __init__(self, root_obj, root_name,sess, load_source=False):
        self.docs = dict()
        self.inh_cache = set()
        self.sd_list = list()
        self.root_obj = root_obj
        self.load_source = load_source
        self.sess = sess  # sqla session
        self.doc_parser(self.root_obj, root_name, name_chain=root_name)
        self.vote_name()
        logging.info(f"{len(self.docs)} things found")
        logging.info(f"saving [{root_name}] basics to db")
        self.sess.commit()
        logging.info(f"creating df for [{root_name}]")
        self.df = self.to_df()

    def __len__(self):
        return len(self.docs)

    def mid(self, obj):
        """
        memory address
        """
        return id(obj)

    def to_df(self):
        df = pd.DataFrame(
            list(i.to_dicts("id","name", "doc", "path", "names", "level", "source","ctype","alias") for i in self.docs.values()))
        return df

    def new_doc(self, name_chain, kid, parent, level,ctype):
        """
        New docModel object
        :param name_chain: name chain string
        :param kid: the subject new  obj
        :param parent: parent obj
        :param level: int, level from root obj
        :return: docModel object
        """
        sd = docModel(id=id(kid), name=name_chain.split(".")[-1],
                      names=name_chain, level=level, doc=str(kid.__doc__),
                      ctype = ctype,
                      )
        if ctype=="function":
            path = kid.__code__.co_filename
            if path[-3:] == ".py":
                sd.path = path
                sd.code = inspect.getsource(kid)
        else:
            sd.path = get_path(kid)
        if ctype=="class":
            self.parse_inh(kid)
        if self.load_source:
            sd.source = get_source(kid)

        self.sess.add(sd)
        self.sd_list.append(sd)

        if parent:
            dg = docGraphModel(parent_id=parent.id, kid_id=sd.id)
            self.sess.add(dg)
        return sd

    def parse_inh(self,obj):
        addr = self.mid(obj)
        if addr in self.inh_cache:
            return None
        elif hasattr(obj,"__bases__"):
            bases = obj.__bases__
            if len(bases) == 0: return None

            for b in bases:
                name = obj.__name__
                ig = inhGraphModel(anc_id=id(b), des_id=addr)
                self.sess.add(ig)
                self.parse_inh(b)
                if id(b) not in self.docs:
                    self.doc_parser(b,name,level=1, name_chain=name, parent=None)

            self.inh_cache.add(addr)

    def doc_parser(self, obj, name, level=0, name_chain="", parent=None):
        """
        Parse the sub structure of an object and tracing its documentation
        obj: python class/ object /function
        name: str, name of the object
        level:int, level count from the root obj
        name_chain: str
        parent: docModel,
        """
        addr = self.mid(obj)
        ctype = checks(obj)
        if ctype == False: return None
        if addr in self.docs:
            if type(self.docs[addr]) == docModel:
                sd = self.docs[addr]
                sd.names = sd.names +"," + name_chain
                self.sess.add(sd)
                if parent:
                    dg = docGraphModel(parent_id=parent.id, kid_id=sd.id)
                    self.sess.add(dg)
            return None

        if hasattr(obj, "__doc__"):
            sd = self.new_doc(name_chain, obj, parent, level, ctype)
            self.docs[addr] = sd

        for attr_name in dir(obj):
            sub_obj = getattr(obj, attr_name)
            if checks(sub_obj)==False: continue
            name_chain_ = name_chain + "." + attr_name
            if self.mid(sub_obj) in self.docs:
                sd = self.docs[self.mid(sub_obj)]
                sd.names = sd.names +","+ name_chain_

                if self.load_source:
                    sd.source = get_source(sub_obj)
                self.sess.add(sd)
                if parent:
                    dg = docGraphModel(parent_id=self.docs[addr].id, kid_id=sd.id)
                    self.sess.add(dg)
                continue
            elif "__" not in attr_name:
                if level < 6:
                    try:
                        self.doc_parser(getattr(obj, attr_name), attr_name, level=level + 1, name_chain=name_chain_,
                                        parent=self.docs[addr])
                    except Exception as e:
                        logging.error(f"{name_chain_},\t{e}")

    def vote_name(self):
        logging.info("start voting for most frequent name")
        for sd in self.sd_list:
            name_list = list(n.split(".")[-1] for n in sd.names.split(","))
            sd.name = most_frequent(name_list)
            sd.alias = ",".join(set(name_list))
            self.sess.add(sd)
        logging.info("voting complete")

### Out facing function

* lib: str, the name you assign to your target, preferably start with a letter, no funny chars except under score
* import_: bool, default True, import the object from ```lib``` name?
* obj: could be class, module, function, variable, default None

In [None]:
# export
def parse_lib(lib, import_ = True, obj = None):
    path = os.path.join(basedir, f"{lib}.db")
    dataurl = "sqlite:///" + path
    os.system(f"rm {path}")
    print(f"creating SQLite db:\t {dataurl}")
    eng = ce(dataurl)
    sess = Session(bind=eng)

    for m in [docModel,docGraphModel,inhGraphModel]:
        refresh_table(m, engine = eng)

    if import_:
        dt = docTour(__import__(lib), lib, sess)
    else:
        dt = docTour(obj, lib, sess)
    return dt,dataurl



## Experiment

Try some example 

In [None]:
from torch import nn

dataurl will point to the sqlite file generated from this operation

In [None]:
dt,dataurl = parse_lib("GRU", False, nn.GRU)

creating SQLite db:	 sqlite:////Users/salvor/Downloads/GRU.db


#### Data sample

Sample of the data we extracted from our operation

In [None]:
dt.df.sample(5)

Unnamed: 0,id,name,doc,path,names,level,source,ctype,alias
45,5056307712,_get_flat_weights_names,,/Users/salvor/anaconda3/lib/python3.7/site-pac...,"GRU._get_flat_weights._parameter_names_fn,GRU....",3,,function,"_parameter_names_fn,_get_flat_weights_names"
53,5056308576,permute_hidden,,/Users/salvor/anaconda3/lib/python3.7/site-pac...,"GRU.permute_hidden,GRU.permute_hidden",2,,function,permute_hidden
11,5055174272,add_module,Adds a child module to the current module.\n\n...,/Users/salvor/anaconda3/lib/python3.7/site-pac...,"RNNBase.add_module,GRU.add_module,GRU.add_module",2,,function,add_module
5,5055186272,_named_members,Helper method for yielding various names + mem...,/Users/salvor/anaconda3/lib/python3.7/site-pac...,"RNNBase._named_members,GRU._named_members,GRU....",2,,function,_named_members
13,5055186704,buffers,Returns an iterator over module buffers.\n\n ...,/Users/salvor/anaconda3/lib/python3.7/site-pac...,"RNNBase.buffers,GRU.buffers,GRU.buffers",2,,function,buffers
