In [1]:
import inspect
import ast
from ast import NodeTransformer
import hashlib

from pathlib import Path

In [2]:
!pip install rich -q

In [3]:
from rich import print

In [4]:
from asttokens import ASTTokens

In [5]:
from tliba.etl import add_bernoulli_samples, combine_random_samples
from tliba import compute_moments
from tliba.random import draw_beta_samples

In [6]:
funcs = [
    add_bernoulli_samples,
    combine_random_samples,
    compute_moments,
    draw_beta_samples,
]

In [7]:
modules = [
    inspect.getmodule(func)
    for func in funcs
]
modules

[<module 'tliba.etl' from '/Users/AF57BI/Documents/code/pycodehash/venv/lib/python3.9/site-packages/tliba/etl.py'>,
 <module 'tliba.etl' from '/Users/AF57BI/Documents/code/pycodehash/venv/lib/python3.9/site-packages/tliba/etl.py'>,
 <module 'tliba.summary' from '/Users/AF57BI/Documents/code/pycodehash/venv/lib/python3.9/site-packages/tliba/summary.py'>,
 <module 'tliba.random.rng' from '/Users/AF57BI/Documents/code/pycodehash/venv/lib/python3.9/site-packages/tliba/random/rng.py'>]

In [8]:
mod_metadata = []
for mod in modules:
    data = dict(
        name=mod.__name__, 
        path=Path(inspect.getsourcefile(mod)),
    )
    data["pkg"] = data["name"].split(".", 1)[0]
    data["src"] = data["path"].read_text()
    data["tree"] = ast.parse(data["src"])
    data["tokens"] = ASTTokens(data["src"], parse=False, tree=data["tree"])
    mod_metadata.append(data)

print(mod_metadata)

In [9]:
from importlib.util import find_spec

from rope.base.libutils import analyze_modules, path_to_resource
from rope.base.project import Project
from rope.refactor.occurrences import Finder
from rope.contrib.findit import Location,find_definition


In [10]:
projects = []
for data in mod_metadata:
    spec = find_spec(data["pkg"])
    project = Project(projectroot=spec.submodule_search_locations[0])
    analyze_modules(project)
    projects.append(project)

print(projects)

In [11]:
pymods = []
for mod, project in zip(mod_metadata, projects):
    pymod = project.get_module(mod["name"])
    pymods.append(pymod)

print(pymods)

In [12]:
# locs = {}
# for project, func, module in zip(projects, funcs, pymods):
#     finder = Finder(project, func.__name__)
#     mylocs = []
#     for occurrence in finder.find_occurrences(pymodule=module):
#         mylocs.append(Location(occurrence))
#     locs[func.__name__] = mylocs

# print(locs)

In [13]:
nodes = []
for func, module in zip(funcs, pymods):
    func = module.get_attribute(func.__name__).get_object()
    print(func)
    nodes.append(func.ast_node)

print(nodes)

In [14]:
def hash_string(s):
    return hashlib.sha256(s).hexdigest()

In [15]:
def get_name(n):
    if isinstance(n, ast.Name):
        return n.id
    elif isinstance(n, ast.Attribute):
        return get_name(n.value) + "." + n.attr
    elif isinstance(n, ast.Call):
        return "call"
    else:
        return "?"

In [16]:
def _get_text_range(node: ast.expr, tokens):
    """Get string offset from ast Node
    This is a workaround since `asttoken.get_text_range` needs to be an "EnhancedAST" node...

    Args:
        node: ast Node
        tokens: asttoken tokens

    Returns:
        Offset tuple. Returns 0,0 if not found
    """
    start = end = None
    for token in tokens:
        if token.start == (node.lineno, node.col_offset):
            start = token.startpos
        if token.end == (node.end_lineno, node.end_col_offset):
            end = token.endpos
    if start is not None and end is not None:
        return start, end

    return 0, 0

In [39]:
class ReplaceCall(NodeTransformer):
    def __init__(self, project, module, metadata):
        self.hash_repr = None
        self.metadata = metadata
        self.project = project
        self.module = module
    
    def visit_Call(self, node: ast.Call):
        # prepend `h` to ensure the syntax is valid
        print(node, get_name(node.func))

        offset = _get_text_range(node.func, self.metadata["tokens"].tokens)
        name = self.metadata["src"][offset[0]:offset[1]]
        
        definition = find_definition(self.project, self.metadata["src"], offset[0])
        if definition is None:
            print(f"{name} not found")
        else:
            if isinstance(definition, Location) and definition.resource is None:
                fname = metadata["src"][definition.region[0]:definition.region[1]]

                print('definition name location', definition.region, fname)

                func = self.module.get_attribute(fname).get_object()
                # TODO: hash again!
                print(func)
            else:
                src = Path(definition.resource.path).read_text()
                fname = src[definition.region[0]:definition.region[1]]
                print('definition name location', definition.region, fname)
                module = self.project.get_pymodule(definition.resource)
                func = module.get_attribute(fname).get_object()
                # TODO: hash again!
                print(func)
            # src = Path(definition.resource.path).read_text()
            # print(definition.region, src[definition.region[0]:definition.region[1]])
            # self.hash_repr = "h" + hash_string(ast.unparse(node).encode())
        super().generic_visit(node)
        return node

    def visit_Name(self, node: ast.Name):
        # if self.hash_repr is not None:
        # node.id = self.hash_repr
        # self.hash_repr = None
        return node


In [41]:
for node, project, metadata, module in zip(nodes, projects, mod_metadata, pymods):
    print('calls in')
    print(ast.unparse(node))

    visitor = ReplaceCall(project, module, metadata)
    n = visitor.visit(node)
    print("-" * 80)

# note that: aliases and library imports do not work with `rope`
# - cases such as `rng.draw_....`
# - import `hello` as `normal_samples` ...


# TODO: naive baseline; hash all relevant modules (preprocessed)