In [1]:
import json
import ast
from collections import Counter

In [2]:
awkward_uproot_data = json.load(open("/home/jpivarski/storage/data/GitHub-all-user-nonfork-uproot-awkward.json"))

In [3]:
class TrackedObject:
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return f"TrackedObject({self.name!r})"
    def __str__(self):
        return self.name
    def __hash__(self):
        return hash((TrackedObject, self.name))
    def __eq__(self, other):
        return isinstance(other, TrackedObject) and self.name == other.name
    def __ne__(self, other):
        return not self.__eq__(other)
    def sub(self, name):
        return TrackedObject(f"{self.name}.{name}")
    def call(self, args):
        return TrackedCall(self, args)

class TrackedCall:
    def __init__(self, function, args, has_star, source):
        self.function = function
        self.args = args
        self.has_star = has_star
        self.source = source
    def __repr__(self):
        return f"TrackedCall({self.function!r}, {self.args!r}, {self.has_star!r}, {self.source!r})"
    def __str__(self):
        args = []
        for i, (n, v) in enumerate(self.args):
            if str(i) == n:
                k = ""
            else:
                k = f"{n}="
            if v is None:
                args.append(f"{k}?")
            else:
                args.append(f"{k}{json.loads(v)!r}")
        return f"{self.function!s}({', '.join(args)})"
    def __hash__(self):
        return hash((TrackedCall, self.function, self.args, self.has_star, self.source))
    def __eq__(self, other):
        return isinstance(other, TrackedCall) and self.function == other.function and self.args == other.args and self.has_star == other.has_star and self.source == other.source

def from_attribute(tree, scope):
    if isinstance(tree, ast.Name) and tree.id in scope:
        if isinstance(scope[tree.id], TrackedObject):
            return scope[tree.id]
    elif isinstance(tree, ast.Attribute):
        obj = from_attribute(tree.value, scope)
        if isinstance(obj, TrackedObject):
            return obj.sub(tree.attr)

def literal(tree):
    try:
        return json.dumps(ast.literal_eval(tree))
    except (ValueError, TypeError, SyntaxError, MemoryError, RecursionError):
        return None

def tracked(tree):
    out = []

    def analyze(tree, scope):
        if isinstance(tree, ast.Import):
            analyze(tree.names, scope)
            for alias in tree.names:
                if alias.name.split(".")[0] in ("uproot", "uproot3", "uproot4"):
                    obj = TrackedObject(".".join(["uproot"] + alias.name.split(".")[1:]))
                    out.append(obj)
                    scope[alias.name if alias.asname is None else alias.asname] = obj
                if alias.name.split(".")[0] in ("awkward", "awkward0", "awkward1"):
                    obj = TrackedObject(".".join(["ak"] + alias.name.split(".")[1:]))
                    out.append(obj)
                    scope[alias.name if alias.asname is None else alias.asname] = obj
        elif isinstance(tree, ast.ImportFrom):
            analyze(tree.module, scope)
            analyze(tree.names, scope)
            analyze(tree.level, scope)
            if tree.module is not None and tree.module.split(".")[0] in ("uproot", "uproot3", "uproot4", "awkward", "awkward0", "awkward1"):
                base = TrackedObject(tree.module)
                for alias in tree.names:
                    obj = base.sub(alias.name)
                    out.append(obj)
                    scope[alias.name if alias.asname is None else alias.asname] = obj
        elif isinstance(tree, ast.Attribute):
            analyze(tree.value, scope)
            analyze(tree.attr, scope)
            analyze(tree.ctx, scope)
            if isinstance(tree.ctx, ast.Load):
                obj = from_attribute(tree, scope)
                if isinstance(obj, TrackedObject):
                    out.append(obj)
        elif isinstance(tree, ast.Call):
            analyze(tree.func, scope)
            analyze(tree.args, scope)
            analyze(tree.keywords, scope)
            obj = from_attribute(tree.func, scope)
            if isinstance(obj, TrackedObject):
                args = []
                for i, x in enumerate(tree.args):
                    if not isinstance(x, ast.Starred):
                        args.append((str(i), literal(x)))
                for x in sorted([x for x in tree.keywords if x.arg is not None], key=lambda x: x.arg):
                    args.append((x.arg, literal(x.value)))
                has_star = any(isinstance(x, ast.Starred) for x in tree.args) or any(x.arg is None for x in tree.keywords)
                out.append(TrackedCall(obj, tuple(args), has_star, ast.unparse(tree)))
        elif isinstance(tree, (ast.Assign, ast.AnnAssign)):
            if isinstance(tree, ast.Assign):
                analyze(tree.targets, scope)
                analyze(tree.value, scope)
                analyze(tree.type_comment, scope)
                targets = tree.targets
            elif isinstance(tree, ast.AnnAssign):
                analyze(tree.target, scope)
                analyze(tree.annotation, scope)
                analyze(tree.value, scope)
                analyze(tree.simple, scope)
                targets = [tree.target]
            for target in targets:
                if isinstance(target, ast.Name):
                    obj = from_attribute(tree.value, scope)
                    if isinstance(obj, TrackedObject):
                        scope[target.id] = obj
                    else:
                        scope[target.id] = literal(tree.value)
                elif isinstance(target, ast.Tuple) and isinstance(tree.value, ast.Tuple) and len(target.elts) == len(tree.value.elts):
                    for t, v in zip(target.elts, tree.value.elts):
                        if isinstance(t, ast.Name):
                            obj = from_attribute(v, scope)
                            if isinstance(obj, TrackedObject):
                                scope[t.id] = obj
                            else:
                                scope[t.id] = literal(v)
        elif isinstance(tree, ast.AugAssign):
            analyze(tree.target, scope)
            analyze(tree.op, scope)
            analyze(tree.value, scope)
            if isinstance(tree.target, ast.Name) and tree.target.id in scope:
                del scope[tree.target.id]
        elif isinstance(tree, ast.Delete):
            analyze(tree.targets, scope)
            for target in tree.targets:
                if isinstance(target, ast.Name) and target.id in scope:
                    del scope[target.id]
        elif isinstance(tree, ast.FunctionDef):
            analyze(tree.name, scope)
            analyze(tree.args, scope)
            analyze(tree.body, dict(scope))
            analyze(tree.decorator_list, scope)
            analyze(tree.returns, scope)
        elif isinstance(tree, ast.Lambda):
            analyze(tree.args, scope)
            analyze(tree.body, dict(scope))
        elif isinstance(tree, ast.AST):
            for fieldname in tree._fields:
                analyze(getattr(tree, fieldname), scope)
        elif isinstance(tree, list):
            for x in tree:
                analyze(x, scope)
        elif isinstance(tree, (bool, int, float, complex, str, bytes)):
            pass
        elif tree is None or tree is Ellipsis:
            pass
        else:
            raise NotImplementedError(f"{type(tree) = } {repr(tree) = }")
    
    analyze(tree, {})
    
    return out

In [4]:
for repodata in awkward_uproot_data:
    for filedata in repodata["files"]:
        if filedata["name"].startswith("nickh2000/EMTF-DQM-Website/csctimingenv/"):
            continue   # avoid two vendored copies of Uproot's own codebase
        try:
            syntax_tree = ast.parse(filedata["text"])
        except SyntaxError:
            pass
        else:
            print(filedata["name"])
            for obj in tracked(syntax_tree):
                if isinstance(obj, TrackedCall):
                    print(f"    {str(obj):<50s} {obj.source}")

936-BCruz/Translating-Analyses-Into-Prototype-Analysis-Systems/Higgs to 4 Leptons Analysis.ipynb
    ak.num(?)                                          ak.num(good_mu)
    ak.argsort(?, ascending=False, axis=-1)            ak.argsort(mu4.pt, axis=-1, ascending=False)
    ak.sum(?, axis=-1)                                 ak.sum(f4_mu4_sorted[:, :, 'charge'], axis=-1)
    ak.combinations(?, 2)                              ak.combinations(f4c0_mu4_sorted, 2)
    ak.flatten(?)                                      ak.flatten(Za_2mu)
    ak.flatten(?)                                      ak.flatten(Zb_2mu)
    ak.num(?)                                          ak.num(good_e)
    ak.argsort(?, ascending=False, axis=-1)            ak.argsort(e4.pt, axis=-1, ascending=False)
    ak.sum(?, axis=-1)                                 ak.sum(f4_e4_sorted[:, :, 'charge'], axis=-1)
    ak.combinations(?, 2)                              ak.combinations(f4c0_e4_sorted, 2)
    ak.flatten(?)              