Permalink
Browse files

callgraph categorizes classes and functions by .py source file.

We use the co_filename attribute of the __code__ object.

Problems:

- Classes don't have a __code__ object, so we don't know where they are
  unless they have methods.
- os.chdir() is a native function which doesn't have a __code__ object,
  so it doesn't know where it lives either.  Python can't find it
  either: try inspect.getsourcefile(os.chdir).
  • Loading branch information...
Andy Chu
Andy Chu committed Mar 15, 2018
1 parent eac70cd commit 88ca058ed95b4587695a4dfa3af96e77fbd045f5
Showing with 194 additions and 59 deletions.
  1. +1 −1 core/glob_.py
  2. +2 −2 core/process.py
  3. +1 −1 core/state.py
  4. +158 −40 opy/callgraph.py
  5. +31 −14 opy/callgraph_demo.py
  6. +1 −1 scripts/count.sh
View
@@ -162,7 +162,7 @@ def _GlobUnescape(s): # used by cmd_exec
return unescaped
class Globber:
class Globber(object):
def __init__(self, exec_opts):
self.exec_opts = exec_opts
View
@@ -546,7 +546,7 @@ def WhenDone(self, pid, status):
self.job_state.WhenDone(self.pipe_status[-1])
class JobState:
class JobState(object):
"""Global list of jobs, used by a few builtins."""
def __init__(self):
@@ -591,7 +591,7 @@ def WhenDone(self, pid):
# TODO: Update the list
class Waiter:
class Waiter(object):
"""A capability to wait for processes.
This must be a singleton (and is because Executor is a singleton).
View
@@ -31,7 +31,7 @@
e_die = util.e_die
class _ErrExit:
class _ErrExit(object):
"""Manages the errexit setting.
- The user can change it with builtin 'set' at any point in the code.
View
@@ -4,8 +4,10 @@
callgraph.py
"""
import sys
import collections
import dis
import os
import sys
import __builtin__ # For looking up names
import types
@@ -85,10 +87,14 @@ def Disassemble(co):
import sre_compile
def _GetAttr(module, name):
# Hack for bug in _fixup_range() ! (No longer in Python 3.6 head.)
if module is sre_compile and name == 'l':
return None
# traceback.py has a hasattr() test
if module is sys and name == 'tracebacklimit':
return None
try:
val = getattr(module, name)
@@ -99,37 +105,36 @@ def _GetAttr(module, name):
return val
def _Walk(func, module, seen, out):
def _Walk(obj, cls, module, syms):
"""
Discover statically what (globally-accessible) functions and classes are
used.
Something like this is OK:
def Adder(x):
def f(y):
return x + y
return f
# def Adder(x):
# def f(y):
# return x + y
# return f
Because we'll still have access to the inner code object. We probably won't
compile it though.
"""
id_ = id(func) # Prevent recursion like word.LeftMostSpanForPart
if id_ in seen:
if syms.Seen(obj):
return
seen.add(id_)
out.append(func)
#print(func)
if not hasattr(func, '__code__'): # Builtins don't have bytecode.
#print(obj)
if hasattr(obj, '__code__'): # Builtins don't have bytecode.
co = obj.__code__
syms.Add(obj, cls, co.co_filename, co.co_firstlineno)
else:
syms.Add(obj, cls, None, None)
return
#log('\tNAME %s', val.__code__.co_name)
#log('\tNAMES %s', val.__code__.co_names)
# Most functions and classes we call are globals!
#log('\t_Walk %s %s', func, module)
#log('\t_Walk %s %s', obj, module)
#log('\t%s', sorted(dir(module)))
# Have to account for foo.Bar(), which gives this sequence:
@@ -141,7 +146,7 @@ def f(y):
try:
last_val = None # value from previous LOAD_GLOBAL or LOAD_ATTR
g = Disassemble(func.__code__)
g = Disassemble(obj.__code__)
while True:
op, const, var = g.next()
@@ -161,29 +166,155 @@ def f(y):
if callable(val):
# Recursive call.
_Walk(val, sys.modules[val.__module__], seen, out)
_Walk(val, None, sys.modules[val.__module__], syms)
# If the value is a class, walk its methods. Note that we assume ALL
# methods are used. It's possible to narrow this down a bit and detect
# unused methods.
if isinstance(val, type):
cls = val
#log('type %s', val)
for name in dir(val):
# prevent error with __abstractmethods__ attribute
if name.startswith('__'):
#if name.startswith('__'):
if name == '__abstractmethods__':
continue
field_val = getattr(val, name)
#log('field_val %s', field_val)
if isinstance(field_val, types.MethodType):
func_obj = field_val.im_func
_Walk(func_obj, sys.modules[func_obj.__module__], seen, out)
_Walk(func_obj, cls, sys.modules[func_obj.__module__], syms)
last_val = val # Used on next iteration
except StopIteration:
pass
#log('\tDone _Walk %s %s', func, module)
#log('\tDone _Walk %s %s', obj, module)
class Class(object):
def __init__(self, cls):
self.cls = cls
self.methods = []
def Name(self):
return self.cls.__name__
def Path(self):
if self.methods:
_, path, _ = self.methods[0]
return path
else:
return None
def AddMethod(self, m, path, line_num):
# Just assume the method is in the same file as the class itself.
self.methods.append((m, path, line_num))
def Print(self):
base_names = ' '.join(c.__name__ for c in self.cls.__bases__)
print(' %s(%s)' % (self.cls.__name__, base_names))
methods = [(m.__name__, m) for (m, _, _) in self.methods]
methods.sort()
for name, m in methods:
print(' %s' % name)
class Symbols(object):
"""A sink for discovered symbols.
TODO: Need module namespaces.
"""
def __init__(self):
self.seen = set()
self.classes = {} # integer id() -> Class
self.functions = [] # list of callables
self.paths = {} # path -> list of functions
def Seen(self, c):
"""c: a callable."""
id_ = id(c)
if id_ in self.seen:
return True
self.seen.add(id_)
return False
def Add(self, obj, cls, path, line_num):
"""Could be a function, class Constructor, or method.
Can also be native (C) or interpreted (Python with __code__ attribute.)
Returns:
True if we haven't yet seen it.
"""
if path is not None:
path = os.path.normpath(path)
if isinstance(obj, type):
id_ = id(obj)
# NOTE: Python's classes don't have a __code__ object, which appears to
# be an irregularity. So we have to get location information from the
# METHODS.
assert not hasattr(obj, '__code__'), obj
assert path is None
assert line_num is None
self.classes[id_] = Class(obj)
elif cls is not None:
id_ = id(cls)
descriptor = self.classes[id_]
descriptor.AddMethod(obj, path, line_num)
else:
self.functions.append((obj, path, line_num))
return True
def Report(self, f=sys.stdout):
# Now categorize into files. We couldn't do that earlier because classes
# don't know where they are located!
# TODO:
# - ASDL classes don't know where they are, because they don't have
# methods!
# - Builtin methods like os.fork() doesn't know what module it's in
# either!
srcs = collections.defaultdict(SourceFile)
for func, path, line_num in self.functions:
srcs[path].functions.append((func, line_num))
for cls in self.classes.values():
srcs[cls.Path()].classes.append(cls)
for path in sorted(srcs):
src = srcs[path]
print('%s' % path)
for func, _ in src.functions:
has_code = hasattr(func, '__code__')
print(' %s' % func.__name__)
classes = [(c.Name(), c) for c in src.classes]
classes.sort()
for c in src.classes:
c.Print()
print()
class SourceFile(object):
def __init__(self):
self.classes = []
self.functions = []
def Walk(main, modules):
@@ -206,26 +337,13 @@ def Walk(main, modules):
Returns:
TODO: callgraph? Flat dict of all functions called? Or edges?
"""
out = []
seen = set() # Set of id() values
_Walk(main, modules['__main__'], seen, out)
print('---')
for o in out:
print(o)
def main(argv):
from core import util
out = []
seen = set()
#_Walk(util.log, util, out)
_Walk(util.ShowAppVersion, util, seen, out)
syms = Symbols()
_Walk(main, None, modules['__main__'], syms)
#_Walk(util.log, sys.modules['core.util'], out)
print('---')
for o in out:
print(o)
# TODO:
# - co_consts should be unified? So we know how big the const pool is.
# - organize callables by CLASSES
# - I want a two level hierarchy
syms.Report()
if __name__ == '__main__':
main(sys.argv)
View
@@ -6,15 +6,33 @@
import sys
from opy import callgraph
_private = '_PRIVATE'
private = 'PRIVATE'
def g():
def f():
sys.settrace(sys.settrace) # Try passing a type to a type.
def main(argv):
def h():
import dis
dis.dis(f)
from core import util
out = []
seen = set()
#_Walk(util.log, util, out)
callgraph._Walk(util.ShowAppVersion, util, seen, out)
#_Walk(util.log, sys.modules['core.util'], out)
print('---')
for o in out:
print(o)
def g(argv):
print(dir(sys))
g()
@@ -23,16 +41,15 @@ def main(argv):
print(_private)
def main(argv):
callgraph.Walk(g, sys.modules)
h()
if __name__ == '__main__':
import dis
dis.dis(g)
if 1:
from opy import callgraph
callgraph.Walk(main, sys.modules)
else:
try:
main(sys.argv)
except RuntimeError as e:
print >>sys.stderr, 'FATAL: %s' % e
sys.exit(1)
try:
main(sys.argv)
except RuntimeError as e:
print >>sys.stderr, 'FATAL: %s' % e
sys.exit(1)
View
@@ -201,7 +201,7 @@ top-level() {
}
py-symbols() {
CALLGRAPH=1 bin/oil.py | sort
CALLGRAPH=1 bin/oil.py
}
old-style-classes() {

0 comments on commit 88ca058

Please sign in to comment.