diff --git a/.gitignore b/.gitignore index 51cbe85..d1271d7 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,4 @@ coverage.xml # Sphinx documentation docs/_build/ +.DS_Store \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index 741b620..ee2bbb3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ global-include . *.py *.c *.h Makefile *.pyx -global-exclude optional_test_performance.py +global-exclude chumpy/optional_test_performance.py prune dist diff --git a/Makefile b/Makefile index ef44c4d..17f42db 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ tidy: test: clean qtest qtest: all - python -m unittest discover + python -m unittest discover -s chumpy coverage: clean qcov qcov: all diff --git a/README.md b/README.md index a3371e1..7670495 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,19 @@ chumpy ====== + +Chumpy is a Python-based framework designed to handle the auto-differentiation problem, +which is to evalute an expression and its derivatives with respect to its inputs, with +the use of the chain rule. Chumpy is intended to make construction and local +minimization of objectives easier. Specifically, it provides: + +- Easy problem construction by using Numpy’s application interface +- Easy access to derivatives via auto differentiation +- Easy local optimization methods (12 of them: most of which use the derivatives) + +Chumpy comes with its own demos, which can be seen by typing the following:: + +>> import chumpy +>> chumpy.demo() # prints out a list of possible demos + +Licensing is specified in the attached LICENSE.txt. + diff --git a/README.txt b/README.txt deleted file mode 100644 index 6c1b8d9..0000000 --- a/README.txt +++ /dev/null @@ -1,16 +0,0 @@ -Chumpy is a Python-based framework designed to handle the auto-differentiation problem, -which is to evalute an expression and its derivatives with respect to its inputs, with -the use of the chain rule. Chumpy is intended to make construction and local -minimization of objectives easier. Specifically, it provides: - -- Easy problem construction by using Numpy’s application interface -- Easy access to derivatives via auto differentiation -- Easy local optimization methods (12 of them: most of which use the derivatives) - -Chumpy comes with its own demos, which can be seen by typing the following:: - ->> import chumpy ->> chumpy.demo() # prints out a list of possible demos - -Licensing is specified in the attached LICENSE.txt. - diff --git a/__init__.py b/chumpy/__init__.py similarity index 98% rename from __init__.py rename to chumpy/__init__.py index f090eb6..3adc38d 100644 --- a/__init__.py +++ b/chumpy/__init__.py @@ -6,6 +6,8 @@ import testing from version import version as __version__ +from version import version as __version__ + from numpy import bool, int, float, complex, object, unicode, str, nan, inf def test(): diff --git a/api_compatibility.py b/chumpy/api_compatibility.py similarity index 100% rename from api_compatibility.py rename to chumpy/api_compatibility.py diff --git a/ch.py b/chumpy/ch.py similarity index 70% rename from ch.py rename to chumpy/ch.py index d3b429f..f346e40 100755 --- a/ch.py +++ b/chumpy/ch.py @@ -9,7 +9,7 @@ __all__ = ['Ch', 'depends_on', 'MatVecMult', 'ChHandle', 'ChLambda'] -import sys +import os, sys, time import inspect import scipy.sparse as sp import numpy as np @@ -18,11 +18,22 @@ import copy as external_copy from functools import wraps from scipy.sparse.linalg.interface import LinearOperator +import utils from utils import row, col import collections from copy import deepcopy +from utils import timer +# Turn this on if you want the profiler injected +DEBUG = False +# Turn this on to make optimizations very chatty for debugging +VERBOSE = False +def pif(msg): + # print-if-verbose. + if DEBUG or VERBOSE: + sys.stdout.write(msg + '\n') + _props_for_dict = weakref.WeakKeyDictionary() def _props_for(cls): if cls not in _props_for_dict: @@ -61,9 +72,13 @@ class Ch(object): terms = [] dterms = ['x'] __array_priority__ = 2.0 - _cached_parms = {} _setup_terms = {} + _default_kwargs = {'make_dense' : False, 'make_sparse' : False} + _status = "undefined" + + called_dr_wrt = False + profiler = None ######################################################## # Construction @@ -82,6 +97,15 @@ def __new__(cls, *args, **kwargs): object.__setattr__(result, '_itr', None) object.__setattr__(result, '_parents', weakref.WeakKeyDictionary()) object.__setattr__(result, '_cache', {'r': None, 'drs': weakref.WeakKeyDictionary()}) + + if DEBUG: + object.__setattr__(result, '_cache_info', {}) + object.__setattr__(result, '_status', 'new') + + for name, default_val in cls._default_kwargs.items(): + object.__setattr__(result, '_%s' % name, kwargs.get(name, default_val)) + if name in kwargs: + del kwargs[name] # Set up storage that allows @depends_on to work #props = [p for p in inspect.getmembers(cls, lambda x : isinstance(x, property)) if hasattr(p[1].fget, 'deps')] @@ -153,6 +177,10 @@ def setup_terms(cls): ######################################################## # Identifiers + @property + def short_name(self): + return self.label if hasattr(self, 'label') else self.__class__.__name__ + @property def sid(self): """Semantic id.""" @@ -278,11 +306,8 @@ def _compute_dr_wrt_sliced(self, wrt): return None wrt._call_on_changed() - - try: - jac = wrt.compute_dr_wrt(inner).T - except Exception as e: - import pdb; pdb.set_trace() + + jac = wrt.compute_dr_wrt(inner).T return self._superdot(result, jac) @@ -408,8 +433,24 @@ def clear_cache(self, itr=None): todo.append(parent) done.add(id(next)) return nodes_visited - - + + + def clear_cache_wrt(self, wrt, itr=None): + if self._cache['drs'].has_key(wrt): + self._cache['drs'][wrt] = None + + if hasattr(self, 'dr_cached') and wrt in self.dr_cached: + self.dr_cached[wrt] = None + + if itr is None or itr != self._itr: + for parent, parent_dict in self._parents.items(): + if wrt in parent._cache['drs'] or (hasattr(parent, 'dr_cached') and wrt in parent.dr_cached): + parent.clear_cache_wrt(wrt=wrt, itr=itr) + object.__setattr__(parent, '_dirty_vars', parent._dirty_vars.union(parent_dict['varnames'])) + parent._invalidate_cacheprop_names(parent_dict['varnames']) + + object.__setattr__(self, '_itr', itr) + def replace(self, old, new): if (hasattr(old, 'dterms') != hasattr(new, 'dterms')): raise Exception('Either "old" and "new" must both be "Ch", or they must both be neither.') @@ -541,6 +582,9 @@ def _call_on_changed(self): if hasattr(self, 'is_valid'): validity, msg = self.is_valid() assert validity, msg + if hasattr(self, '_status'): + self._status = 'new' + if len(self._dirty_vars) > 0: self.on_changed(self._dirty_vars) object.__setattr__(self, '_dirty_vars', set()) @@ -555,8 +599,8 @@ def r(self): return self._cache['rview'] + def _superdot(self, lhs, rhs, profiler=None): - def _superdot(self, lhs, rhs): try: if lhs is None: return None @@ -579,17 +623,26 @@ def _superdot(self, lhs, rhs): if sp.issparse(rhs): return LinearOperator((lhs.shape[0], rhs.shape[1]), lambda x : lhs.dot(rhs.dot(x))) else: + # TODO: ????????????? + # return lhs.matmat(rhs) return lhs.dot(rhs) # TODO: Figure out how/whether to do this. - #lhs, rhs = utils.convert_inputs_to_sparse_if_possible(lhs, rhs) + tm_maybe_sparse = timer() + lhs, rhs = utils.convert_inputs_to_sparse_if_necessary(lhs, rhs) + if tm_maybe_sparse() > 0.1: + pif('convert_inputs_to_sparse_if_necessary in {}sec'.format(tm_maybe_sparse())) if not sp.issparse(lhs) and sp.issparse(rhs): return rhs.T.dot(lhs.T).T - return lhs.dot(rhs) - except: - import pdb; pdb.set_trace() + except Exception as e: + import sys, traceback + traceback.print_exc(file=sys.stdout) + if DEBUG: + import pdb; pdb.post_mortem() + else: + raise def lmult_wrt(self, lhs, wrt): if lhs is None: @@ -610,7 +663,7 @@ def lmult_wrt(self, lhs, wrt): if hasattr(p, 'dterms') and p is not wrt and p.is_dr_wrt(wrt): if not isinstance(p, Ch): print 'BROKEN!' - import pdb; pdb.set_trace() + raise Exception('Broken Should be Ch object') indirect_dr = p.lmult_wrt(self._superdot(lhs, self._compute_dr_wrt_sliced(p)), wrt) if indirect_dr is not None: @@ -673,21 +726,32 @@ def compute_rop(self, wrt, rhs): return self._superdot(dr, rhs) - def dr_wrt(self, wrt, reverse_mode=False): + def dr_wrt(self, wrt, reverse_mode=False, profiler=None): + tm_dr_wrt = timer() + self.called_dr_wrt = True self._call_on_changed() - drs = [] + drs = [] if wrt in self._cache['drs']: + if DEBUG: + if wrt not in self._cache_info: + self._cache_info[wrt] = 0 + self._cache_info[wrt] +=1 + self._status = 'cached' return self._cache['drs'][wrt] direct_dr = self._compute_dr_wrt_sliced(wrt) if direct_dr is not None: - drs.append(direct_dr) + drs.append(direct_dr) + if DEBUG: + self._status = 'pending' + propnames = set(_props_for(self.__class__)) for k in set(self.dterms).intersection(propnames.union(set(self.__dict__.keys()))): + p = getattr(self, k) if hasattr(p, 'dterms') and p is not wrt: @@ -697,12 +761,16 @@ def dr_wrt(self, wrt, reverse_mode=False): if reverse_mode: lhs = self._compute_dr_wrt_sliced(p) if isinstance(lhs, LinearOperator): + tm_dr_wrt.pause() dr2 = p.dr_wrt(wrt) + tm_dr_wrt.resume() indirect_dr = lhs.matmat(dr2) if dr2 != None else None else: indirect_dr = p.lmult_wrt(lhs, wrt) else: # forward mode - dr2 = p.dr_wrt(wrt) + tm_dr_wrt.pause() + dr2 = p.dr_wrt(wrt, profiler=profiler) + tm_dr_wrt.resume() if dr2 is not None: indirect_dr = self.compute_rop(p, rhs=dr2) @@ -711,38 +779,58 @@ def dr_wrt(self, wrt, reverse_mode=False): if len(drs)==0: result = None - elif len(drs)==1: result = drs[0] - else: + # TODO: ???????? + # result = np.sum(x for x in drs) if not np.any([isinstance(a, LinearOperator) for a in drs]): result = reduce(lambda x, y: x+y, drs) else: result = LinearOperator(drs[0].shape, lambda x : reduce(lambda a, b: a.dot(x)+b.dot(x),drs)) # TODO: figure out how/whether to do this. - # if result is not None and not sp.issparse(result): - # nonzero = np.count_nonzero(result) - # if nonzero > 0 and hasattr(result, 'size') and result.size / nonzero >= 10.0: - # #import pdb; pdb.set_trace() - # result = sp.csc_matrix(result) - - + if result is not None and not sp.issparse(result): + tm_nonzero = timer() + nonzero = np.count_nonzero(result) + if tm_nonzero() > 0.1: + pif('count_nonzero in {}sec'.format(tm_nonzero())) + if nonzero == 0 or hasattr(result, 'size') and result.size / float(nonzero) >= 10.0: + tm_convert_to_sparse = timer() + result = sp.csc_matrix(result) + import gc + gc.collect() + pif('converting result to sparse in {}sec'.format(tm_convert_to_sparse())) + if (result is not None) and (not sp.issparse(result)) and (not isinstance(result, LinearOperator)): result = np.atleast_2d(result) - + # When the number of parents is one, it indicates that # caching this is probably not useful because not # more than one parent will likely ask for this same # thing again in the same iteration of an optimization. # + # When the number of parents is zero, this is the top + # level object and should be cached; when it's > 1 + # cache the combinations of the children. + # # If we *always* filled in the cache, it would require # more memory but would occasionally save a little cpu, # on average. if len(self._parents.keys()) != 1: self._cache['drs'][wrt] = result + if DEBUG: + self._status = 'done' + + if getattr(self, '_make_dense', False) and sp.issparse(result): + result = result.todense() + if getattr(self, '_make_sparse', False) and not sp.issparse(result): + result = sp.csc_matrix(result) + + if tm_dr_wrt() > 0.1: + pif('dx of {} wrt {} in {}sec, sparse: {}'.format(self.short_name, wrt.short_name, tm_dr_wrt(), sp.issparse(result))) + return result @@ -754,11 +842,172 @@ def __call__(self, **kwargs): ######################################################## # Visualization + @property + def reset_flag(self): + """ + Used as fn in loop_children_do + """ + return lambda x: setattr(x, 'called_dr_wrt', False) + + def loop_children_do(self, fn): + fn(self) + for dterm in self.dterms: + if hasattr(self, dterm): + dtval = getattr(self, dterm) + if hasattr(dtval, 'dterms') or hasattr(dtval, 'terms'): + if hasattr(dtval, 'loop_children_do'): + dtval.loop_children_do(fn) + + + def show_tree_cache(self, label, current_node=None): + ''' + Show tree and cache info with color represent _status + Optionally accpet current_node arg to highlight the current node we are in + ''' + import os + import tempfile + import subprocess + + assert DEBUG, "Please use dr tree visualization functions in debug mode" + + cache_path = os.path.abspath('profiles') + def string_for(self, my_name): + + color_mapping = {'new' : 'grey', 'pending':'red', 'cached':'yellow', 'done': 'green'} + if hasattr(self, 'label'): + my_name = self.label + my_name = '%s (%s)' % (my_name, str(self.__class__.__name__)) + result = [] + if not hasattr(self, 'dterms'): + return result + for dterm in self.dterms: + if hasattr(self, dterm): + dtval = getattr(self, dterm) + if hasattr(dtval, 'dterms') or hasattr(dtval, 'terms'): + child_label = getattr(dtval, 'label') if hasattr(dtval, 'label') else dterm + child_label = '%s (%s)' % (child_label, str(dtval.__class__.__name__)) + src = 'aaa%d' % (id(self)) + dst = 'aaa%d' % (id(dtval)) + + s = '' + color = color_mapping[dtval._status] if hasattr(dtval, '_status') else 'grey' + if dtval == current_node: + color = 'blue' + if isinstance(dtval, reordering.Concatenate) and len(dtval.dr_cached) > 0: + s = 'dr_cached\n' + for k, v in dtval.dr_cached.iteritems(): + if v is not None: + issparse = sp.issparse(v) + size = v.size + if issparse: + size = v.shape[0] * v.shape[1] + nonzero = len(v.data) + else: + nonzero = np.count_nonzero(v) + s += '\nsparse: %s\nsize: %d\nnonzero: %d\n' % (issparse, size, nonzero) + # if dtval.called_dr_wrt: + # # dtval.called_dr_wrt = False + # color = 'brown3' + # else: + # color = 'azure1' + elif len(dtval._cache['drs']) > 0: + s = '_cache\n' + + for k, v in dtval._cache['drs'].iteritems(): + if v is not None: + issparse = sp.issparse(v) + size = v.size + if issparse: + size = v.shape[0] * v.shape[1] + nonzero = len(v.data) + else: + nonzero = np.count_nonzero(v) + + s += '\nsparse: %s\nsize: %d\nnonzero: %d\n' % (issparse, size, nonzero) + if hasattr(dtval, '_cache_info'): + s += '\ncache hit:%s\n' % dtval._cache_info[k] + # if hasattr(dtval,'called_dr_wrt') and dtval.called_dr_wrt: + # # dtval.called_dr_wrt = False + # color = 'brown3' + # else: + # color = 'azure1' + result += ['%s -> %s;' % (src, dst)] + # Do not overwrite src + #result += ['%s [label="%s"];' % (src, my_name)] + result += ['%s [label="%s\n%s\n", color=%s, style=filled];' % + (dst, child_label, s, color)] + result += string_for(getattr(self, dterm), dterm) + return result + + + dot_file_contents = 'digraph G {\n%s\n}' % '\n'.join(list(set(string_for(self, 'root')))) + dot_file_name = os.path.join(cache_path, label) + png_file_name = os.path.join(cache_path, label+'.png') + with open(dot_file_name, 'w') as dot_file: + with open(png_file_name, 'w') as png_file: + dot_file.write(dot_file_contents) + dot_file.flush() + + png_file = tempfile.NamedTemporaryFile(suffix='.png', delete=False) + subprocess.call(['dot', '-Tpng', '-o', png_file.name, dot_file.name]) + + import webbrowser + webbrowser.open('file://' + png_file.name) + + self.loop_children_do(self.reset_flag) + + def show_tree_wrt(self, wrt): + import tempfile + import subprocess + + assert DEBUG, "Please use dr tree visualization functions in debug mode" + + def string_for(self, my_name, wrt): + if hasattr(self, 'label'): + my_name = self.label + my_name = '%s (%s)' % (my_name, str(self.__class__.__name__)) + result = [] + if not hasattr(self, 'dterms'): + return result + for dterm in self.dterms: + if hasattr(self, dterm): + dtval = getattr(self, dterm) + if hasattr(dtval, 'dterms') or hasattr(dtval, 'terms'): + child_label = getattr(dtval, 'label') if hasattr(dtval, 'label') else dterm + child_label = '%s (%s)' % (child_label, str(dtval.__class__.__name__)) + src = 'aaa%d' % (id(self)) + dst = 'aaa%d' % (id(dtval)) + result += ['%s -> %s;' % (src, dst)] + result += ['%s [label="%s"];' % (src, my_name)] + if wrt in dtval._cache['drs'] and dtval._cache['drs'][wrt] is not None: + issparse = sp.issparse(dtval._cache['drs'][wrt]) + size = dtval._cache['drs'][wrt].size + nonzero = np.count_nonzero(dtval._cache['drs'][wrt]) + result += ['%s [label="%s\n is_sparse: %s\nsize: %d\nnonzero: %d"];' % + (dst, child_label, issparse, size, + nonzero)] + else: + result += ['%s [label="%s"];' % (dst, child_label)] + result += string_for(getattr(self, dterm), dterm, wrt) + return result + + + dot_file_contents = 'digraph G {\n%s\n}' % '\n'.join(list(set(string_for(self, 'root', wrt)))) + dot_file = tempfile.NamedTemporaryFile() + dot_file.write(dot_file_contents) + dot_file.flush() + png_file = tempfile.NamedTemporaryFile(suffix='.png', delete=False) + subprocess.call(['dot', '-Tpng', '-o', png_file.name, dot_file.name]) + import webbrowser + webbrowser.open('file://' + png_file.name) + def show_tree(self, cachelim=np.inf): """Cachelim is in Mb. For any cached jacobians above cachelim, they are also added to the graph. """ - import tempfile import subprocess + + assert DEBUG, "Please use dr tree visualization functions in debug mode" + def string_for(self, my_name): if hasattr(self, 'label'): my_name = self.label @@ -835,6 +1084,31 @@ def string_for(self, my_name): import webbrowser webbrowser.open('file://' + png_file.name) + + def tree_iterator(self, visited=None, path=None): + ''' + Generator function that traverse the dr tree start from this node (self). + ''' + if visited is None: + visited = set() + + if self not in visited: + if path and isinstance(path, list): + path.append(self) + + visited.add(self) + yield self + + if not hasattr(self, 'dterms'): + yield + + for dterm in self.dterms: + if hasattr(self, dterm): + child = getattr(self, dterm) + if hasattr(child, 'dterms') or hasattr(child, 'terms'): + for node in child.tree_iterator(visited): + yield node + def floor(self): return ch_ops.floor(self) @@ -997,6 +1271,39 @@ def compute_dr_wrt(self, wrt): if wrt is self._result: return 1 +# ChGroup is similar to ChLambda in that it's designed to expose the "internal" +# inputs of result but, unlike ChLambda, result is kept internal and called when +# compute_r and compute_dr_wrt is called to compute the relevant Jacobians. +# This provides a way of effectively applying the chain rule in a different order. +class ChGroup(Ch): + terms = ['result', 'args'] + dterms = [] + term_order = ['result', 'args'] + + def on_changed(self, which): + for argname in set(which).intersection(set(self.args.keys())): + if not self.args[argname].x is getattr(self, argname) : + self.args[argname].x = getattr(self, argname) + + # right now the entries in args have to refer to terms/dterms of result, + # it would be better if they could be "internal" as well, but for now the idea + # is that result may itself be a ChLambda. + def __init__(self, result, args): + self.args = { argname: ChHandle(x=arg) for argname, arg in args.items() } + for argname, arg in self.args.items(): + setattr(result, argname, arg) + if result.is_dr_wrt(arg.x): + self.add_dterm(argname, arg.x) + else: + self.terms.append(argname) + setattr(self, argname, arg.x) + self._result = result + + def compute_r(self): + return self._result.r + + def compute_dr_wrt(self, wrt): + return self._result.dr_wrt(wrt) import ch_ops from ch_ops import * diff --git a/ch_ops.py b/chumpy/ch_ops.py similarity index 98% rename from ch_ops.py rename to chumpy/ch_ops.py index 03ade3f..deea353 100755 --- a/ch_ops.py +++ b/chumpy/ch_ops.py @@ -19,7 +19,7 @@ 'greater', 'greater_equal', 'less', 'less_equal', 'equal', 'not_equal', 'nonzero', 'ascontiguousarray', 'asfarray', 'arange', 'asarray', 'copy', 'cross', - 'shape', 'tensordot', 'sign'] + 'shape', 'sign'] __all__ += ['SumOfSquares', @@ -361,7 +361,7 @@ def compute_dr_wrt(self, wrt): idxs_presum = np.arange(self.x.size).reshape(self.x.shape) idxs_presum = np.rollaxis(idxs_presum, self.axis, 0) idxs_postsum = np.arange(self.r.size).reshape(self.r.shape) - tp = np.ones(idxs_presum.ndim) + tp = np.ones(idxs_presum.ndim, dtype=np.uint32) tp[0] = idxs_presum.shape[0] idxs_postsum = np.tile(idxs_postsum, tp) data = np.ones(idxs_postsum.size) / self.x.shape[self.axis] @@ -519,7 +519,7 @@ def broadcast_shape(a_shape, b_shape): #print 'aaa' + str(our_result) #print 'bbb' + str(numpy_result) if not np.array_equal(our_result, numpy_result): - import pdb; pdb.set_trace() + raise Exception('numpy result not equal to our result') assert(np.array_equal(our_result, numpy_result)) broadcast_shape_cache[uid] = tuple(our_result) @@ -726,10 +726,7 @@ class dot(ch.Ch): dterms = 'a', 'b' def compute_r(self): - try: - return self.a.r.dot(self.b.r) - except: - import pdb; pdb.set_trace() + return self.a.r.dot(self.b.r) def compute_d1(self): # To stay consistent with numpy, we must upgrade 1D arrays to 2D @@ -796,12 +793,14 @@ def nonzero(a): a = a.r return np.nonzero(a) +# Try to pull the code for tensordot in from numpy and reinterpret it using chumpy ops try: import inspect exec(''.join(inspect.getsourcelines(np.tensordot)[0])) -except: pass + __all__ += ['tensordot'] +except: + pass - def main(): diff --git a/ch_random.py b/chumpy/ch_random.py similarity index 100% rename from ch_random.py rename to chumpy/ch_random.py diff --git a/extras.py b/chumpy/extras.py similarity index 100% rename from extras.py rename to chumpy/extras.py diff --git a/linalg.py b/chumpy/linalg.py similarity index 100% rename from linalg.py rename to chumpy/linalg.py diff --git a/logic.py b/chumpy/logic.py similarity index 100% rename from logic.py rename to chumpy/logic.py diff --git a/chumpy/monitor.py b/chumpy/monitor.py new file mode 100644 index 0000000..35d3f71 --- /dev/null +++ b/chumpy/monitor.py @@ -0,0 +1,149 @@ +''' +Logging service for tracking dr tree changes from root objective +and record every step that incrementally changes the dr tree + +''' +import os, sys, time +import json +import psutil + +import scipy.sparse as sp +import numpy as np +import reordering + +_TWO_20 = float(2 **20) + +''' +memory utils + +''' +def pdb_mem(): + from monitor import get_current_memory + mem = get_current_memory() + if mem > 7000: + import pdb;pdb.set_trace() + +def get_peak_mem(): + ''' + this returns peak memory use since process starts till the moment its called + ''' + import resource + rusage_denom = 1024. + if sys.platform == 'darwin': + # ... it seems that in OSX the output is different units ... + rusage_denom = rusage_denom * rusage_denom + mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / rusage_denom + return mem + +def get_current_memory(): + p = psutil.Process(os.getpid()) + mem = p.memory_info()[0]/_TWO_20 + + return mem + +''' +Helper for Profiler +''' + +def build_cache_info(k, v, info_dict): + if v is not None: + issparse = sp.issparse(v) + size = v.size + if issparse: + nonzero = len(v.data) + else: + nonzero = np.count_nonzero(v) + info_dict[k.short_name] = { + 'sparse': issparse, + 'size' : str(size), + 'nonzero' : nonzero, + } + + +def cache_info(ch_node): + result = {} + if isinstance(ch_node, reordering.Concatenate) and hasattr(ch_node, 'dr_cached') and len(ch_node.dr_cached) > 0: + for k, v in ch_node.dr_cached.iteritems(): + build_cache_info(k, v, result) + elif len(ch_node._cache['drs']) > 0: + for k, v in ch_node._cache['drs'].iteritems(): + build_cache_info(k, v, result) + + return result + +class DrWrtProfiler(object): + base_path = os.path.abspath('profiles') + + def __init__(self, root, base_path=None): + self.root = root.obj + self.history = [] + + ts = time.time() + if base_path: + self.base_path = base_path + + self.path = os.path.join(self.base_path, 'profile_%s.json' % str(ts)) + self.root_path = os.path.join(self.base_path, 'root_%s.json' % str(ts)) + + + with open(self.root_path, 'w') as f: + json.dump(self.dump_tree(self.root), f, indent=4) + + def dump_tree(self, node): + if not hasattr(node, 'dterms'): + return [] + + node_dict = self.serialize_node(node, verbose=False) + if hasattr(node, 'visited') and node.visited: + node_dict.update({'indirect':True}) + return node_dict + + node.visited = True + children_list = [] + for dterm in node.dterms: + if hasattr(node, dterm): + child = getattr(node, dterm) + if hasattr(child, 'dterms') or hasattr(child, 'terms'): + children_list.append(self.dump_tree(child)) + node_dict.update({'children':children_list}) + return node_dict + + def serialize_node(self, ch_node, verbose=True): + node_id = id(ch_node) + name = ch_node.short_name + ts = time.time() + status = ch_node._status + mem = get_current_memory() + node_cache_info = cache_info(ch_node) + + rec = { + 'id': str(node_id), + 'indirect' : False, + } + if verbose: + rec.update({ + 'name':name, + 'ts' : ts, + 'status':status, + 'mem': mem, + 'cache': node_cache_info, + }) + return rec + + def show_tree(self, label): + ''' + show tree from the root node + ''' + self.root.show_tree_cache(label) + + def record(self, ch_node): + ''' + Incremental changes + ''' + rec = self.serialize_node(ch_node) + self.history.append(rec) + + def harvest(self): + print 'collecting and dump to file %s' % self.path + with open(self.path, 'w') as f: + json.dump(self.history, f, indent=4) \ No newline at end of file diff --git a/chumpy/optimization.py b/chumpy/optimization.py new file mode 100755 index 0000000..c315198 --- /dev/null +++ b/chumpy/optimization.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python + +""" +Author(s): Matthew Loper + +See LICENCE.txt for licensing and contact information. +""" + +__all__ = ['minimize'] + +import numpy as np +import ch +import scipy.sparse as sp +import scipy.optimize + +from optimization_internal import minimize_dogleg + +#from memory_profiler import profile, memory_usage + +# def disable_cache_for_single_parent_node(node): +# if hasattr(node, '_parents') and len(node._parents.keys()) == 1: +# node.want_cache = False + + +# Nelder-Mead +# Powell +# CG +# BFGS +# Newton-CG +# Anneal +# L-BFGS-B +# TNC +# COBYLA +# SLSQP +# dogleg +# trust-ncg +def minimize(fun, x0, method='dogleg', bounds=None, constraints=(), tol=None, callback=None, options=None): + + if method == 'dogleg': + if options is None: options = {} + return minimize_dogleg(fun, free_variables=x0, on_step=callback, **options) + + if isinstance(fun, list) or isinstance(fun, tuple): + fun = ch.concatenate([f.ravel() for f in fun]) + if isinstance(fun, dict): + fun = ch.concatenate([f.ravel() for f in fun.values()]) + obj = fun + free_variables = x0 + + + from ch import SumOfSquares + + hessp = None + hess = None + if obj.size == 1: + obj_scalar = obj + else: + obj_scalar = SumOfSquares(obj) + + def hessp(vs, p,obj, obj_scalar, free_variables): + changevars(vs,obj,obj_scalar,free_variables) + if not hasattr(hessp, 'vs'): + hessp.vs = vs*0+1e16 + if np.max(np.abs(vs-hessp.vs)) > 0: + + J = ns_jacfunc(vs,obj,obj_scalar,free_variables) + hessp.J = J + hessp.H = 2. * J.T.dot(J) + hessp.vs = vs + return np.array(hessp.H.dot(p)).ravel() + #return 2*np.array(hessp.J.T.dot(hessp.J.dot(p))).ravel() + + if method.lower() != 'newton-cg': + def hess(vs, obj, obj_scalar, free_variables): + changevars(vs,obj,obj_scalar,free_variables) + if not hasattr(hessp, 'vs'): + hessp.vs = vs*0+1e16 + if np.max(np.abs(vs-hessp.vs)) > 0: + J = ns_jacfunc(vs,obj,obj_scalar,free_variables) + hessp.H = 2. * J.T.dot(J) + return hessp.H + + def changevars(vs, obj, obj_scalar, free_variables): + cur = 0 + changed = False + for idx, freevar in enumerate(free_variables): + sz = freevar.r.size + newvals = vs[cur:cur+sz].copy().reshape(free_variables[idx].shape) + if np.max(np.abs(newvals-free_variables[idx]).ravel()) > 0: + free_variables[idx][:] = newvals + changed = True + + cur += sz + + methods_without_callback = ('anneal', 'powell', 'cobyla', 'slsqp') + if callback is not None and changed and method.lower() in methods_without_callback: + callback(None) + + return changed + + def residuals(vs,obj, obj_scalar, free_variables): + changevars(vs, obj, obj_scalar, free_variables) + residuals = obj_scalar.r.ravel()[0] + return residuals + + def scalar_jacfunc(vs,obj, obj_scalar, free_variables): + if not hasattr(scalar_jacfunc, 'vs'): + scalar_jacfunc.vs = vs*0+1e16 + if np.max(np.abs(vs-scalar_jacfunc.vs)) == 0: + return scalar_jacfunc.J + + changevars(vs, obj, obj_scalar, free_variables) + + if True: # faster, at least on some problems + result = np.concatenate([np.array(obj_scalar.lop(wrt, np.array([[1]]))).ravel() for wrt in free_variables]) + else: + jacs = [obj_scalar.dr_wrt(wrt) for wrt in free_variables] + for idx, jac in enumerate(jacs): + if sp.issparse(jac): + jacs[idx] = jacs[idx].todense() + result = np.concatenate([jac.ravel() for jac in jacs]) + + scalar_jacfunc.J = result + scalar_jacfunc.vs = vs + return result.ravel() + + def ns_jacfunc(vs,obj, obj_scalar, free_variables): + if not hasattr(ns_jacfunc, 'vs'): + ns_jacfunc.vs = vs*0+1e16 + if np.max(np.abs(vs-ns_jacfunc.vs)) == 0: + return ns_jacfunc.J + + changevars(vs, obj, obj_scalar, free_variables) + jacs = [obj.dr_wrt(wrt) for wrt in free_variables] + result = hstack(jacs) + + ns_jacfunc.J = result + ns_jacfunc.vs = vs + return result + + + x1 = scipy.optimize.minimize( + method=method, + fun=residuals, + callback=callback, + x0=np.concatenate([free_variable.r.ravel() for free_variable in free_variables]), + jac=scalar_jacfunc, + hessp=hessp, hess=hess, args=(obj, obj_scalar, free_variables), + bounds=bounds, constraints=constraints, tol=tol, options=options).x + + changevars(x1, obj, obj_scalar, free_variables) + return free_variables + + +def main(): + pass + + +if __name__ == '__main__': + main() + diff --git a/chumpy/optimization_internal.py b/chumpy/optimization_internal.py new file mode 100644 index 0000000..2102d3b --- /dev/null +++ b/chumpy/optimization_internal.py @@ -0,0 +1,455 @@ +import sys +import warnings +import numpy as np +import scipy.sparse as sp +import ch, utils +from ch import pif +from utils import timer + + +def clear_cache_single(node): + node._cache['drs'].clear() + if hasattr(node, 'dr_cached'): + node.dr_cached.clear() + +def vstack(x): + x = [a if not isinstance(a, sp.linalg.interface.LinearOperator) else a.dot(np.eye(a.shape[1])) for a in x] + return sp.vstack(x, format='csc') if any([sp.issparse(a) for a in x]) else np.vstack(x) +def hstack(x): + x = [a if not isinstance(a, sp.linalg.interface.LinearOperator) else a.dot(np.eye(a.shape[1])) for a in x] + return sp.hstack(x, format='csc') if any([sp.issparse(a) for a in x]) else np.hstack(x) + + +_giter = 0 +class ChInputsStacked(ch.Ch): + dterms = 'x', 'obj' + terms = 'free_variables' + + def compute_r(self): + if not hasattr(self, 'fevals'): + self.fevals = 0 + self.fevals += 1 + return self.obj.r.ravel() + + def dr_wrt(self, wrt, profiler=None): + ''' + Loop over free variables and delete cache for the whole tree after finished each one + ''' + if wrt is self.x: + jacs = [] + for fvi, freevar in enumerate(self.free_variables): + tm = timer() + if isinstance(freevar, ch.Select): + new_jac = self.obj.dr_wrt(freevar.a, profiler=profiler) + try: + new_jac = new_jac[:, freevar.idxs] + except: + # non-csc sparse matrices may not support column-wise indexing + new_jac = new_jac.tocsc()[:, freevar.idxs] + else: + new_jac = self.obj.dr_wrt(freevar, profiler=profiler) + + pif('dx wrt {} in {}sec, sparse: {}'.format(freevar.short_name, tm(), sp.issparse(new_jac))) + + if self._make_dense and sp.issparse(new_jac): + new_jac = new_jac.todense() + if self._make_sparse and not sp.issparse(new_jac): + new_jac = sp.csc_matrix(new_jac) + + if new_jac is None: + raise Exception( + 'Objective has no derivative wrt free variable {}. ' + 'You should likely remove it.'.format(fvi)) + + jacs.append(new_jac) + tm = timer() + utils.dfs_do_func_on_graph(self.obj, clear_cache_single) + pif('dfs_do_func_on_graph in {}sec'.format(tm())) + tm = timer() + J = hstack(jacs) + pif('hstack in {}sec'.format(tm())) + return J + + def on_changed(self, which): + global _giter + _giter += 1 + if 'x' in which: + pos = 0 + for idx, freevar in enumerate(self.free_variables): + sz = freevar.r.size + rng = np.arange(pos, pos+sz) + if isinstance(self.free_variables[idx], ch.Select): + # Deal with nested selects + selects = [] + a = self.free_variables[idx] + while isinstance(a, ch.Select): + selects.append(a.idxs) + a = a.a + newv = a.x.copy() + idxs = selects.pop() + while len(selects) > 0: + idxs = idxs[selects.pop()] + newv.ravel()[idxs] = self.x.r.ravel()[rng] + a.__setattr__('x', newv, _giter) + elif isinstance(self.free_variables[idx].x, np.ndarray): + self.free_variables[idx].__setattr__('x', self.x.r[rng].copy().reshape(self.free_variables[idx].x.shape), _giter) + else: # a number + self.free_variables[idx].__setattr__('x', self.x.r[rng], _giter) + pos += sz + + @property + def J(self): + ''' + Compute Jacobian. Analyze dr graph first to disable unnecessary caching + ''' + result = self.dr_wrt(self.x, profiler=self.profiler).copy() + if self.profiler: + self.profiler.harvest() + return np.atleast_2d(result) if not sp.issparse(result) else result + + +def setup_sparse_solver(sparse_solver): + _solver_fns = { + 'cg': lambda A, x, M=None : sp.linalg.cg(A, x, M=M, tol=1e-10)[0], + 'spsolve': lambda A, x : sp.linalg.spsolve(A, x) + } + if callable(sparse_solver): + return sparse_solver + elif isinstance(sparse_solver, str) and sparse_solver in _solver_fns.keys(): + return _solver_fns[sparse_solver] + else: + raise Exception('sparse_solver argument must be either a string in the set (%s) or have the api of scipy.sparse.linalg.spsolve.' % ', '.join(_solver_fns.keys())) + + +def setup_objective(obj, free_variables, on_step=None, disp=True, make_dense=False): + ''' + obj here can be a list of ch objects or a dict of label: ch objects. Either way, the ch + objects will be merged into one objective using a ChInputsStacked. The labels are just used + for printing out values per objective with each iteration. If make_dense is True, the + resulting object with return a desne Jacobian + ''' + # Validate free variables + num_unique_ids = len(np.unique(np.array([id(freevar) for freevar in free_variables]))) + if num_unique_ids != len(free_variables): + raise Exception('The "free_variables" param contains duplicate variables.') + # Extract labels + labels = {} + if isinstance(obj, list) or isinstance(obj, tuple): + obj = ch.concatenate([f.ravel() for f in obj]) + elif isinstance(obj, dict): + labels = obj + obj = ch.concatenate([f.ravel() for f in obj.values()]) + # build objective + x = np.concatenate([freevar.r.ravel() for freevar in free_variables]) + obj = ChInputsStacked(obj=obj, free_variables=free_variables, x=x, make_dense=make_dense) + # build callback + def callback(): + if on_step is not None: + on_step(obj) + if disp: + report_line = ['%.2e' % (np.sum(obj.r**2),)] + for label, objective in sorted(labels.items(), key=lambda x: x[0]): + report_line.append('%s: %.2e' % (label, np.sum(objective.r**2))) + report_line = " | ".join(report_line) + '\n' + sys.stderr.write(report_line) + return obj, callback + + +class DoglegState(object): + ''' + Dogleg preserves a great deal of state from iteration to iteration. Many of the things + that we need to calculate are dependent only on this state (e.g. the various trust region + steps, the current jacobian and the A & g that depends on it, etc.). Holding the state and + the various methods based on that state here allows us to seperate a lot of the jacobian + based calculation from the flow control of the optmization. + + There will be once instance of DoglegState per invocation of minimize_dogleg. + ''' + def __init__(self, delta, solve): + self.iteration = 0 + self._d_gn = None # gauss-newton + self._d_sd = None # steepest descent + self._d_dl = None # dogleg + self.J = None + self.A = None + self.g = None + self._p = None + self.delta = delta + self.solve = solve + self._r = None + self.rho = None + self.done = False + + @property + def p(self): + '''p is the current proposed input vector''' + return self._p + @p.setter + def p(self, val): + self._p = val.reshape((-1, 1)) + + # induce some certainty about what the shape of the steps are + @property + def d_gn(self): + return self._d_gn + @d_gn.setter + def d_gn(self, val): + if val is not None: + val = val.reshape((-1, 1)) + self._d_gn = val + + @property + def d_sd(self): + return self._d_sd + @d_sd.setter + def d_sd(self, val): + if val is not None: + val = val.reshape((-1, 1)) + self._d_sd = val + + @property + def d_dl(self): + return self._d_dl + @d_dl.setter + def d_dl(self, val): + if val is not None: + val = val.reshape((-1, 1)) + self._d_dl = val + + @property + def step(self): + return self.d_dl.reshape((-1, 1)) + @property + def step_size(self): + return np.linalg.norm(self.d_dl) + + def start_iteration(self): + self.iteration += 1 + pif('beginning iteration %d' % (self.iteration,)) + self.d_sd = (np.linalg.norm(self.g)**2 / np.linalg.norm(self.J.dot(self.g))**2 * self.g).ravel() + self.d_gn = None + + @property + def r(self): + '''r is the residual at the current p''' + return self._r + @r.setter + def r(self, val): + self._r = val.copy().reshape((-1, 1)) + self.updateAg() + + def updateAg(self): + tm = timer() + pif('updating A and g...') + JT = self.J.T + self.A = JT.dot(self.J) + self.g = JT.dot(-self.r).reshape((-1, 1)) + pif('A and g updated in %.2fs' % tm()) + + def update_step(self): + # if the Cauchy point is outside the trust region, + # take that direction but only to the edge of the trust region + if self.delta is not None and np.linalg.norm(self.d_sd) >= self.delta: + pif('PROGRESS: Using stunted cauchy') + self.d_dl = np.array(self.delta/np.linalg.norm(self.d_sd) * self.d_sd).ravel() + else: + if self.d_gn is None: + # We only need to compute this once per iteration + self.updateGN() + # if the gauss-newton solution is within the trust region, use it + if self.delta is None or np.linalg.norm(self.d_gn) <= self.delta: + pif('PROGRESS: Using gauss-newton solution') + self.d_dl = np.array(self.d_gn).ravel() + if self.delta is None: + self.delta = np.linalg.norm(self.d_gn) + else: # between cauchy step and gauss-newton step + pif('PROGRESS: between cauchy and gauss-newton') + # apply step + self.d_dl = self.d_sd + self.beta_multiplier * (self.d_gn - self.d_sd) + + @property + def beta_multiplier(self): + delta_sq = self.delta**2 + diff = self.d_gn - self.d_sd + sqnorm_sd = np.linalg.norm(self.d_sd)**2 + pnow = diff.T.dot(diff)*delta_sq + self.d_gn.T.dot(self.d_sd)**2 - np.linalg.norm(self.d_gn)**2 * sqnorm_sd + return float(delta_sq - sqnorm_sd) / float((diff).T.dot(self.d_sd) + np.sqrt(pnow)) + + def updateGN(self): + tm = timer() + if sp.issparse(self.A): + self.A.eliminate_zeros() + pif('sparse solve...sparsity infill is %.3f%% (hessian %dx%d)' % (100. * self.A.nnz / (self.A.shape[0] * self.A.shape[1]), self.A.shape[0], self.A.shape[1])) + if self.g.size > 1: + self.d_gn = self.solve(self.A, self.g).ravel() + if np.any(np.isnan(self.d_gn)) or np.any(np.isinf(self.d_gn)): + from scipy.sparse.linalg import lsqr + warnings.warn("sparse solve failed, falling back to lsqr") + self.d_gn = lsqr(self.A, self.g)[0].ravel() + else: + self.d_gn = np.atleast_1d(self.g.ravel()[0]/self.A[0,0]) + pif('sparse solve...done in %.2fs' % tm()) + else: + pif('dense solve...') + try: + self.d_gn = np.linalg.solve(self.A, self.g).ravel() + except Exception: + warnings.warn("dense solve failed, falling back to lsqr") + self.d_gn = np.linalg.lstsq(self.A, self.g)[0].ravel() + pif('dense solve...done in %.2fs' % tm()) + + def updateJ(self, obj): + tm = timer() + pif('computing Jacobian...') + self.J = obj.J + if self.J is None: + raise Exception("Computing Jacobian failed!") + if sp.issparse(self.J): + tm2 = timer() + self.J = self.J.tocsr() + pif('converted to csr in {}secs'.format(tm2())) + assert(self.J.nnz > 0) + elif ch.VERBOSE: + nonzero = np.count_nonzero(self.J) + pif('Jacobian dense with sparsity %.3f' % (nonzero/self.J.size)) + pif('Jacobian (%dx%d) computed in %.2fs' % (self.J.shape[0], self.J.shape[1], tm())) + if self.J.shape[1] != self.p.size: + raise Exception('Jacobian size mismatch with objective input') + return self.J + + class Trial(object): + ''' + Inside each iteration of dogleg we propose a step and check to see if it's actually + an improvement before we accept it. This class encapsulates that trial and the + testing to see if it is actually an improvement. + + There will be one instance of Trial per iteration in dogleg. + ''' + def __init__(self, proposed_r, state): + self.r = proposed_r + self.state = state + # rho is the ratio of... + # (improvement in SSE) / (predicted improvement in SSE) + self.rho = np.linalg.norm(state.r)**2 - np.linalg.norm(proposed_r)**2 + if self.rho > 0: + with warnings.catch_warnings(): + warnings.filterwarnings('ignore',category=RuntimeWarning) + predicted_improvement = 2. * state.g.T.dot(state.d_dl) - state.d_dl.T.dot(state.A.dot(state.d_dl)) + self.rho /= predicted_improvement + + @property + def is_improvement(self): + return self.rho > 0 + + @property + def improvement(self): + return (np.linalg.norm(self.state.r)**2 - np.linalg.norm(self.r)**2) / np.linalg.norm(self.state.r)**2 + + def trial_r(self, proposed_r): + return self.Trial(proposed_r, self) + + def updateRadius(self, rho, lb=.05, ub=.9): + if rho > ub: + self.delta = max(self.delta, 2.5*np.linalg.norm(self.d_dl)) + elif rho < lb: + self.delta *= .25 + + +def minimize_dogleg(obj, free_variables, on_step=None, + maxiter=200, max_fevals=np.inf, sparse_solver='spsolve', + disp=True, e_1=1e-15, e_2=1e-15, e_3=0., delta_0=None, + treat_as_dense=False): + """"Nonlinear optimization using Powell's dogleg method. + See Lourakis et al, 2005, ICCV '05, "Is Levenberg-Marquardt the + Most Efficient Optimization for Implementing Bundle Adjustment?": + http://www.ics.forth.gr/cvrl/publications/conferences/0201-P0401-lourakis-levenberg.pdf + + e_N are stopping conditions: + e_1 is gradient magnatude threshold + e_2 is step size magnatude threshold + e_3 is improvement threshold (as a ratio; 0.1 means it must improve by 10%% at each step) + + maxiter and max_fevals are also stopping conditions. Note that they're not quite the same, + as an iteration may evaluate the function more than once. + + sparse_solver is the solver to use to calculate the Gauss-Newton step in the common case + that the Jacobian is sparse. It can be 'spsolve' (in which case scipy.sparse.linalg.spsolve + will be used), 'cg' (in which case scipy.sparse.linalg.cg will be used), or any callable + that matches the api of scipy.sparse.linalg.spsolve to solve `A x = b` for x where A is sparse. + + cg, uses a Conjugate Gradient method, and will be faster if A is sparse but x is dense. + spsolve will be faster if x is also sparse. + + delta_0 defines the initial trust region. Generally speaking, if this is set too low then + the optimization will never really go anywhere (to small a trust region to make any real + progress before running out of iterations) and if it's set too high then the optimization + will diverge immidiately and go wild (such a large trust region that the initial step so + far overshoots that it can't recover). If it's left as None, it will be automatically + estimated on the first iteration; it's always updated at each iteration, so this is treated + only as an initialization. + + handle_as_dense explicitly converts all Jacobians of obj to dense matrices + """ + + + solve = setup_sparse_solver(sparse_solver) + obj, callback = setup_objective(obj, free_variables, on_step=on_step, disp=disp, + make_dense=treat_as_dense) + + state = DoglegState(delta=delta_0, solve=solve) + state.p = obj.x.r + + #inject profiler if in DEBUG mode + if ch.DEBUG: + from .monitor import DrWrtProfiler + obj.profiler = DrWrtProfiler(obj) + + callback() + state.updateJ(obj) + state.r = obj.r + + def stop(msg): + if not state.done: + pif(msg) + state.done = True + + if np.linalg.norm(state.g, np.inf) < e_1: + stop('stopping because norm(g, np.inf) < %.2e' % e_1) + while not state.done: + state.start_iteration() + while True: + state.update_step() + if state.step_size <= e_2 * np.linalg.norm(state.p): + stop('stopping because of small step size (norm_dl < %.2e)' % (e_2 * np.linalg.norm(state.p))) + else: + tm = timer() + obj.x = state.p + state.step + trial = state.trial_r(obj.r) + pif('Residuals computed in %.2fs' % tm()) + # if the objective function improved, update input parameter estimate. + # Note that the obj.x already has the new parms, + # and we should not set them again to the same (or we'll bust the cache) + if trial.is_improvement: + state.p = state.p + state.step + callback() + if e_3 > 0. and trial.improvement < e_3: + stop('stopping because improvement < %.1e%%' % (100*e_3)) + else: + state.updateJ(obj) + state.r = trial.r + if np.linalg.norm(state.g, np.inf) < e_1: + stop('stopping because norm(g, np.inf) < %.2e' % e_1) + else: # Put the old parms back + obj.x = ch.Ch(state.p) + obj.on_changed('x') # copies from flat vector to free variables + # update our trust region + state.updateRadius(trial.rho) + if state.delta <= e_2*np.linalg.norm(state.p): + stop('stopping because trust region is too small') + if state.done or trial.is_improvement or (obj.fevals >= max_fevals): + break + if state.iteration >= maxiter: + stop('stopping because max number of user-specified iterations (%d) has been met' % maxiter) + elif obj.fevals >= max_fevals: + stop('stopping because max number of user-specified func evals (%d) has been met' % max_fevals) + return obj.free_variables diff --git a/optional_test_performance.py b/chumpy/optional_test_performance.py similarity index 100% rename from optional_test_performance.py rename to chumpy/optional_test_performance.py diff --git a/reordering.py b/chumpy/reordering.py similarity index 93% rename from reordering.py rename to chumpy/reordering.py index 71cdb21..1220349 100644 --- a/reordering.py +++ b/chumpy/reordering.py @@ -288,11 +288,11 @@ def compute_r(self): def compute_dr_wrt(self, obj): if obj is self.a: if not hasattr(self, '_dr_cached'): - IS = np.arange(len(self.idxs)).flatten() + IS = np.arange(len(self.idxs)) JS = self.idxs.ravel() ij = np.vstack((row(IS), row(JS))) data = np.ones(len(self.idxs)) - self._dr_cached = sp.csc_matrix((data, ij), shape=(len(self.idxs), len(self.a.r.ravel()))) + self._dr_cached = sp.csc_matrix((data, ij), shape=(len(self.idxs), np.prod(self.a.shape))) return self._dr_cached def on_changed(self, which): @@ -383,7 +383,9 @@ def everything(self): return self._everything def compute_dr_wrt(self, wrt): - if wrt in self.dr_cached: + if not hasattr(self, 'dr_cached'): + self.dr_cached = weakref.WeakKeyDictionary() + if wrt in self.dr_cached and self.dr_cached[wrt] is not None: return self.dr_cached[wrt] if wrt not in self.our_terms: @@ -408,8 +410,15 @@ def compute_dr_wrt(self, wrt): JS = np.concatenate(JS).ravel() data = np.concatenate(data) - self.dr_cached[wrt] = sp.csc_matrix((data, (IS, JS)), shape=(self.r.size, wrt.size)) - return self.dr_cached[wrt] + res = sp.csc_matrix((data, (IS, JS)), shape=(self.r.size, wrt.size)) + + if len(self._parents.keys()) != 1: + self.dr_cached[wrt] = res + else: + self.dr_cached[wrt] = None + + return res + def expand_concatenates(mtxs, axis=0): mtxs = list(mtxs) @@ -423,11 +432,11 @@ def expand_concatenates(mtxs, axis=0): return done -def concatenate(mtxs, axis=0): +def concatenate(mtxs, axis=0, **kwargs): mtxs = expand_concatenates(mtxs, axis) - result = Concatenate() + result = Concatenate(**kwargs) result.dterms = [] for i, mtx in enumerate(mtxs): result.dterms.append('m%d' % (i,)) @@ -435,11 +444,11 @@ def concatenate(mtxs, axis=0): result.axis = axis return result -def hstack(mtxs): - return concatenate(mtxs, axis=1) +def hstack(mtxs, **kwargs): + return concatenate(mtxs, axis=1, **kwargs) -def vstack(mtxs): - return concatenate([atleast_2d(m) for m in mtxs], axis=0) +def vstack(mtxs, **kwargs): + return concatenate([atleast_2d(m) for m in mtxs], axis=0, **kwargs) -def dstack(mtxs): - return concatenate([atleast_3d(m) for m in mtxs], axis=2) +def dstack(mtxs, **kwargs): + return concatenate([atleast_3d(m) for m in mtxs], axis=2, **kwargs) diff --git a/test_ch.py b/chumpy/test_ch.py similarity index 99% rename from test_ch.py rename to chumpy/test_ch.py index 689e687..85b6bfe 100755 --- a/test_ch.py +++ b/chumpy/test_ch.py @@ -123,8 +123,8 @@ def test_dr_wrt_selection(self): def test_sum_mean_std_var(self): - for fn in [ch.sum, ch.mean,ch.var, ch.std]: - + for fn in [ch.sum, ch.mean, ch.var, ch.std]: + # Create fake input and differences in input space data1 = ch.ones((3,4,7,2)) data2 = ch.array(data1.r + .1 * np.random.rand(data1.size).reshape(data1.shape)) @@ -137,20 +137,20 @@ def test_sum_mean_std_var(self): # Empirical and predicted derivatives gt = result2.r - result1.r pred = result1.dr_wrt(data1).dot(diff.ravel()).reshape(gt.shape) - + #print np.max(np.abs(gt - pred)) - + if fn in [ch.std, ch.var]: - self.assertTrue(1e-2 > np.max(np.abs(gt - pred))) + self.assertTrue(1e-2 > np.max(np.abs(gt - pred))) else: - self.assertTrue(1e-14 > np.max(np.abs(gt - pred))) + self.assertTrue(1e-14 > np.max(np.abs(gt - pred))) # test caching dr0 = result1.dr_wrt(data1) data1[:] = np.random.randn(data1.size).reshape(data1.shape) self.assertTrue(result1.dr_wrt(data1) is dr0) # changing values shouldn't force recompute result1.axis=1 self.assertTrue(result1.dr_wrt(data1) is not dr0) - + self.assertEqual(ch.mean(ch.eye(3),axis=1).ndim, np.mean(np.eye(3),axis=1).ndim) self.assertEqual(ch.mean(ch.eye(3),axis=0).ndim, np.mean(np.eye(3),axis=0).ndim) self.assertEqual(ch.sum(ch.eye(3),axis=1).ndim, np.sum(np.eye(3),axis=1).ndim) diff --git a/test_inner_composition.py b/chumpy/test_inner_composition.py similarity index 100% rename from test_inner_composition.py rename to chumpy/test_inner_composition.py diff --git a/test_linalg.py b/chumpy/test_linalg.py similarity index 100% rename from test_linalg.py rename to chumpy/test_linalg.py diff --git a/test_optimization.py b/chumpy/test_optimization.py similarity index 83% rename from test_optimization.py rename to chumpy/test_optimization.py index 349b6a2..d752acb 100755 --- a/test_optimization.py +++ b/chumpy/test_optimization.py @@ -123,13 +123,13 @@ class TestOptimization(unittest.TestCase): def test_dogleg_rosen(self): obj, freevars = Rosen() - minimize(fun=obj, x0=freevars, method='dogleg', options={'maxiter': 337}) + minimize(fun=obj, x0=freevars, method='dogleg', options={'maxiter': 337, 'disp': False}) self.assertTrue(freevars[0].r[0]==1.) self.assertTrue(freevars[1].r[0]==1.) def test_dogleg_madsen(self): obj = Madsen(x = Ch(np.array((3.,1.)))) - minimize(fun=obj, x0=[obj.x], method='dogleg', options={'maxiter': 34}) + minimize(fun=obj, x0=[obj.x], method='dogleg', options={'maxiter': 34, 'disp': False}) self.assertTrue(np.sum(obj.r**2)/2 < 0.386599528247) @unittest.skip('negative sign in exponent screws with reverse mode') @@ -161,12 +161,33 @@ def gradfunc(x): #tm = time.time() x1 = scipy.optimize.fmin_bfgs(errfunc, x0, fprime=gradfunc, maxiter=8, disp=0) #print 'forward: took %.es' % (time.time() - tm,) - self.assertTrue(obj.r/2. < 0.386599528247) + self.assertLess(obj.r/2., 0.4) # Optimize with chumpy's minimize (which uses scipy's bfgs). obj.x = x0 - minimize(fun=obj, x0=[obj.x], method='bfgs', options={'maxiter': 8}) - self.assertTrue(obj.r/2. < 0.386599528247) + minimize(fun=obj, x0=[obj.x], method='bfgs', options={'maxiter': 8, 'disp': False}) + self.assertLess(obj.r/2., 0.4) + + def test_nested_select(self): + def beales(x, y): + e1 = 1.5 - x + x*y + e2 = 2.25 - x + x*(y**2) + e3 = 2.625 - x + x*(y**3) + return {'e1': e1, 'e2': e2, 'e3': e3} + + x1 = ch.zeros(10) + y1 = ch.zeros(10) + + # With a single select this worked + minimize(beales(x1, y1), x0=[x1[1:4], y1], method='dogleg', options={'disp': False}) + + x2 = ch.zeros(10) + y2 = ch.zeros(10) + + # But this used to raise `AttributeError: 'Select' object has no attribute 'x'` + minimize(beales(x2, y2), x0=[x2[1:8][:3], y2], method='dogleg', options={'disp': False}) + np.testing.assert_array_equal(x1, x2) + np.testing.assert_array_equal(y1, y2) suite = unittest.TestLoader().loadTestsFromTestCase(TestOptimization) diff --git a/testing.py b/chumpy/testing.py similarity index 100% rename from testing.py rename to chumpy/testing.py diff --git a/chumpy/utils.py b/chumpy/utils.py new file mode 100644 index 0000000..19287d3 --- /dev/null +++ b/chumpy/utils.py @@ -0,0 +1,93 @@ +""" +Author(s): Matthew Loper + +See LICENCE.txt for licensing and contact information. +""" +import scipy.sparse as sp +import numpy as np + +def row(A): + return A.reshape((1, -1)) + + +def col(A): + return A.reshape((-1, 1)) + +class timer(object): + def time(self): + import time + return time.time() + def __init__(self): + self._elapsed = 0 + self._start = self.time() + def __call__(self): + if self._start is not None: + return self._elapsed + self.time() - self._start + else: + return self._elapsed + def pause(self): + assert self._start is not None + self._elapsed += self.time() - self._start + self._start = None + def resume(self): + assert self._start is None + self._start = self.time() + +def dfs_do_func_on_graph(node, func, *args, **kwargs): + ''' + invoke func on each node of the dr graph + ''' + for _node in node.tree_iterator(): + func(_node, *args, **kwargs) + + +def sparse_is_desireable(lhs, rhs): + ''' + Examines a pair of matrices and determines if the result of their multiplication should be sparse or not. + ''' + return False + if len(lhs.shape) == 1: + return False + else: + lhs_rows, lhs_cols = lhs.shape + + if len(rhs.shape) == 1: + rhs_rows = 1 + rhs_cols = rhs.size + else: + rhs_rows, rhs_cols = rhs.shape + + result_size = lhs_rows * rhs_cols + + if sp.issparse(lhs) and sp.issparse(rhs): + return True + elif sp.issparse(lhs): + lhs_zero_rows = lhs_rows - np.unique(lhs.nonzero()[0]).size + rhs_zero_cols = np.all(rhs==0, axis=0).sum() + + elif sp.issparse(rhs): + lhs_zero_rows = np.all(lhs==0, axis=1).sum() + rhs_zero_cols = rhs_cols- np.unique(rhs.nonzero()[1]).size + else: + lhs_zero_rows = np.all(lhs==0, axis=1).sum() + rhs_zero_cols = np.all(rhs==0, axis=0).sum() + + num_zeros = lhs_zero_rows * rhs_cols + rhs_zero_cols * lhs_rows - lhs_zero_rows * rhs_zero_cols + + # A sparse matrix uses roughly 16 bytes per nonzero element (8 + 2 4-byte inds), while a dense matrix uses 8 bytes per element. So the break even point for sparsity is 50% nonzero. But in practice, it seems to be that the compression in a csc or csr matrix gets us break even at ~65% nonzero, which lets us say 50% is a conservative, worst cases cutoff. + return (float(num_zeros) / float(size)) >= 0.5 + + +def convert_inputs_to_sparse_if_necessary(lhs, rhs): + ''' + This function checks to see if a sparse output is desireable given the inputs and if so, casts the inputs to sparse in order to make it so. + ''' + if not sp.issparse(lhs) or not sp.issparse(rhs): + if sparse_is_desireable(lhs, rhs): + if not sp.issparse(lhs): + lhs = sp.csc_matrix(lhs) + #print "converting lhs into sparse matrix" + if not sp.issparse(rhs): + rhs = sp.csc_matrix(rhs) + #print "converting rhs into sparse matrix" + return lhs, rhs diff --git a/version.py b/chumpy/version.py similarity index 71% rename from version.py rename to chumpy/version.py index bb53753..30ceaf1 100644 --- a/version.py +++ b/chumpy/version.py @@ -1,3 +1,3 @@ -version = '0.66' +version = '0.67.1' short_version = version full_version = version diff --git a/circle.yml b/circle.yml new file mode 100644 index 0000000..0300127 --- /dev/null +++ b/circle.yml @@ -0,0 +1,18 @@ +general: + branches: + ignore: + - /zz.*/ # Don't run tests on deprecated branches. + +machine: + environment: + PYTHONPATH: /usr/local/lib/python2.7/dist-packages + +dependencies: + pre: + - sudo apt-get update + # Is gfortran needed? This was copied from `.travis.yml` which included it. + - sudo apt-get install -qq python-dev gfortran pkg-config liblapack-dev + +test: + override: + - make test diff --git a/optimization.py b/optimization.py deleted file mode 100755 index 16d6ef9..0000000 --- a/optimization.py +++ /dev/null @@ -1,526 +0,0 @@ -#!/usr/bin/env python - -""" -Author(s): Matthew Loper - -See LICENCE.txt for licensing and contact information. -""" - -__all__ = ['minimize'] - -import time -import math -import sys -import time -import numpy as np -from numpy.linalg import norm - -import ch, utils -from utils import row, col - -import scipy.sparse as sp -import scipy.sparse -import scipy.optimize -from scipy.sparse.linalg.interface import LinearOperator - - - -def vstack(x): - x = [a if not isinstance(a, LinearOperator) else a.dot(np.eye(a.shape[1])) for a in x] - return sp.vstack(x, format='csc') if any([sp.issparse(a) for a in x]) else np.vstack(x) -def hstack(x): - x = [a if not isinstance(a, LinearOperator) else a.dot(np.eye(a.shape[1])) for a in x] - return sp.hstack(x, format='csc') if any([sp.issparse(a) for a in x]) else np.hstack(x) - - -# Nelder-Mead -# Powell -# CG -# BFGS -# Newton-CG -# Anneal -# L-BFGS-B -# TNC -# COBYLA -# SLSQP -# dogleg -# trust-ncg -def minimize(fun, x0, method='dogleg', bounds=None, constraints=(), tol=None, callback=None, options=None): - - if method == 'dogleg': - if options is None: options = {} - return _minimize_dogleg(fun, free_variables=x0, on_step=callback, **options) - - if isinstance(fun, list) or isinstance(fun, tuple): - fun = ch.concatenate([f.ravel() for f in fun]) - if isinstance(fun, dict): - fun = ch.concatenate([f.ravel() for f in fun.values()]) - obj = fun - free_variables = x0 - - - from ch import SumOfSquares - - hessp = None - hess = None - if obj.size == 1: - obj_scalar = obj - else: - obj_scalar = SumOfSquares(obj) - - def hessp(vs, p,obj, obj_scalar, free_variables): - changevars(vs,obj,obj_scalar,free_variables) - if not hasattr(hessp, 'vs'): - hessp.vs = vs*0+1e16 - if np.max(np.abs(vs-hessp.vs)) > 0: - - J = ns_jacfunc(vs,obj,obj_scalar,free_variables) - hessp.J = J - hessp.H = 2. * J.T.dot(J) - hessp.vs = vs - return np.array(hessp.H.dot(p)).ravel() - #return 2*np.array(hessp.J.T.dot(hessp.J.dot(p))).ravel() - - if method.lower() != 'newton-cg': - def hess(vs, obj, obj_scalar, free_variables): - changevars(vs,obj,obj_scalar,free_variables) - if not hasattr(hessp, 'vs'): - hessp.vs = vs*0+1e16 - if np.max(np.abs(vs-hessp.vs)) > 0: - J = ns_jacfunc(vs,obj,obj_scalar,free_variables) - hessp.H = 2. * J.T.dot(J) - return hessp.H - - def changevars(vs, obj, obj_scalar, free_variables): - cur = 0 - changed = False - for idx, freevar in enumerate(free_variables): - sz = freevar.r.size - newvals = vs[cur:cur+sz].copy().reshape(free_variables[idx].shape) - if np.max(np.abs(newvals-free_variables[idx]).ravel()) > 0: - free_variables[idx][:] = newvals - changed = True - - cur += sz - - methods_without_callback = ('anneal', 'powell', 'cobyla', 'slsqp') - if callback is not None and changed and method.lower() in methods_without_callback: - callback(None) - - return changed - - def residuals(vs,obj, obj_scalar, free_variables): - changevars(vs, obj, obj_scalar, free_variables) - residuals = obj_scalar.r.ravel()[0] - return residuals - - def scalar_jacfunc(vs,obj, obj_scalar, free_variables): - if not hasattr(scalar_jacfunc, 'vs'): - scalar_jacfunc.vs = vs*0+1e16 - if np.max(np.abs(vs-scalar_jacfunc.vs)) == 0: - return scalar_jacfunc.J - - changevars(vs, obj, obj_scalar, free_variables) - - if True: # faster, at least on some problems - result = np.concatenate([np.array(obj_scalar.lop(wrt, np.array([[1]]))).ravel() for wrt in free_variables]) - else: - jacs = [obj_scalar.dr_wrt(wrt) for wrt in free_variables] - for idx, jac in enumerate(jacs): - if sp.issparse(jac): - jacs[idx] = jacs[idx].todense() - result = np.concatenate([jac.ravel() for jac in jacs]) - - scalar_jacfunc.J = result - scalar_jacfunc.vs = vs - return result.ravel() - - def ns_jacfunc(vs,obj, obj_scalar, free_variables): - if not hasattr(ns_jacfunc, 'vs'): - ns_jacfunc.vs = vs*0+1e16 - if np.max(np.abs(vs-ns_jacfunc.vs)) == 0: - return ns_jacfunc.J - - changevars(vs, obj, obj_scalar, free_variables) - jacs = [obj.dr_wrt(wrt) for wrt in free_variables] - result = hstack(jacs) - - ns_jacfunc.J = result - ns_jacfunc.vs = vs - return result - - - x1 = scipy.optimize.minimize( - method=method, - fun=residuals, - callback=callback, - x0=np.concatenate([free_variable.r.ravel() for free_variable in free_variables]), - jac=scalar_jacfunc, - hessp=hessp, hess=hess, args=(obj, obj_scalar, free_variables), - bounds=bounds, constraints=constraints, tol=tol, options=options).x - - changevars(x1, obj, obj_scalar, free_variables) - return free_variables - - -_giter = 0 -class ChInputsStacked(ch.Ch): - dterms = 'x', 'obj' - terms = 'free_variables' - - def compute_r(self): - return self.obj.r.ravel() - - # def compute_dr_wrt(self, wrt): - # if wrt is self.x: - # return hstack([self.obj.dr_wrt(freevar) for freevar in self.free_variables]) - - def dr_wrt(self, wrt): - if wrt is self.x: - mtxs = [] - for freevar in self.free_variables: - if isinstance(freevar, ch.Select): - new_mtx = self.obj.dr_wrt(freevar.a) - try: - mtxs.append(new_mtx[:,freevar.idxs]) - except: - mtxs.append(new_mtx.tocsc()[:,freevar.idxs]) - else: - mtxs.append(self.obj.dr_wrt(freevar)) - return hstack(mtxs) - #return hstack([self.obj.dr_wrt(freevar) for freevar in self.free_variables]) - - def on_changed(self, which): - global _giter - _giter += 1 - - if 'x' in which: - pos = 0 - for idx, freevar in enumerate(self.free_variables): - sz = freevar.r.size - rng = np.arange(pos, pos+sz) - - if isinstance(self.free_variables[idx], ch.Select): - newv = self.free_variables[idx].a.x.copy() - newv.ravel()[self.free_variables[idx].idxs] = self.x.r[rng] - self.free_variables[idx].a.__setattr__('x', newv, _giter) - #self.free_variables[idx].a.x = newv - elif isinstance(self.free_variables[idx].x, np.ndarray): - #self.free_variables[idx].x = self.x.r[rng].copy().reshape(self.free_variables[idx].x.shape) - self.free_variables[idx].__setattr__('x', self.x.r[rng].copy().reshape(self.free_variables[idx].x.shape), _giter) - else: # a number - #self.free_variables[idx].x = self.x.r[rng] - self.free_variables[idx].__setattr__('x', self.x.r[rng], _giter) - #self.free_variables[idx] = self.obj.replace(freevar, Ch(self.x.r[rng].copy())) - pos += sz - - - @property - def J(self): - result = self.dr_wrt(self.x).copy() - return np.atleast_2d(result) if not sp.issparse(result) else result - - def JT_dot(self, y): - return self.J.T.dot(y) - - def J_dot(self, y): - return self.J.dot(y) - - # Have not observed this to be faster than just using J directly - def JTJ(self): - if False: - return self.J.T.dot(self.J) - else: - Js = [self.obj.dr_wrt(freevar) for freevar in self.free_variables] - zeroArray=[None]*len(Js) - A = [zeroArray[:] for i in range(len(Js))] - for y in range(len(Js)): - for x in range(len(Js)): - if y > x: - A[y][x] = A[x][y].T - else: - A[y][x] = Js[y].T.dot(Js[x]) - return vstack([hstack(A[y]) for y in range(len(Js))]) - - -_solver_fns = { - 'cg': lambda A, x, M=None : scipy.sparse.linalg.cg(A, x, M=M, tol=1e-10)[0], - 'spsolve': lambda A, x : scipy.sparse.linalg.spsolve(A, x) -} - - - -def _minimize_dogleg(obj, free_variables, on_step=None, - maxiter=200, max_fevals=np.inf, sparse_solver='spsolve', - disp=False, show_residuals=None, e_1=1e-15, e_2=1e-15, e_3=0., delta_0=None): - - """"Nonlinear optimization using Powell's dogleg method. - - See Lourakis et al, 2005, ICCV '05, "Is Levenberg-Marquardt - the Most Efficient Optimization for Implementing Bundle - Adjustment?": - http://www.ics.forth.gr/cvrl/publications/conferences/0201-P0401-lourakis-levenberg.pdf - """ - - import warnings - if show_residuals is not None: - import warnings - warnings.warn('minimize_dogleg: show_residuals parm is deprecaed, pass a dict instead.') - - labels = {} - if isinstance(obj, list) or isinstance(obj, tuple): - obj = ch.concatenate([f.ravel() for f in obj]) - elif isinstance(obj, dict): - labels = obj - obj = ch.concatenate([f.ravel() for f in obj.values()]) - - - niters = maxiter - verbose = disp - num_unique_ids = len(np.unique(np.array([id(freevar) for freevar in free_variables]))) - if num_unique_ids != len(free_variables): - raise Exception('The "free_variables" param contains duplicate variables.') - - obj = ChInputsStacked(obj=obj, free_variables=free_variables, x=np.concatenate([freevar.r.ravel() for freevar in free_variables])) - - def call_cb(): - if on_step is not None: - on_step(obj) - - report_line = "" - if len(labels) > 0: - report_line += '%.2e | ' % (np.sum(obj.r**2),) - for label in sorted(labels.keys()): - objective = labels[label] - report_line += '%s: %.2e | ' % (label, np.sum(objective.r**2)) - if len(labels) > 0: - report_line += '\n' - sys.stderr.write(report_line) - - call_cb() - - # pif = print-if-verbose. - # can't use "print" because it's a statement, not a fn - pif = lambda x: sys.stdout.write(x + '\n') if verbose else 0 - - if callable(sparse_solver): - solve = sparse_solver - elif isinstance(sparse_solver, str) and sparse_solver in _solver_fns.keys(): - solve = _solver_fns[sparse_solver] - else: - raise Exception('sparse_solver argument must be either a string in the set (%s) or have the api of scipy.sparse.linalg.spsolve.' % ''.join(_solver_fns.keys(), ' ')) - - # optimization parms - k_max = niters - fevals = 0 - - k = 0 - delta = delta_0 - p = col(obj.x.r) - - fevals += 1 - - tm = time.time() - pif('computing Jacobian...') - J = obj.J - - if sp.issparse(J): - assert(J.nnz > 0) - pif('Jacobian (%dx%d) computed in %.2fs' % (J.shape[0], J.shape[1], time.time() - tm)) - - if J.shape[1] != p.size: - import pdb; pdb.set_trace() - assert(J.shape[1] == p.size) - - tm = time.time() - pif('updating A and g...') - A = J.T.dot(J) - r = col(obj.r.copy()) - - g = col(J.T.dot(-r)) - pif('A and g updated in %.2fs' % (time.time() - tm)) - - stop = norm(g, np.inf) < e_1 - while (not stop) and (k < k_max) and (fevals < max_fevals): - k += 1 - pif('beginning iteration %d' % (k,)) - d_sd = col((sqnorm(g)) / (sqnorm (J.dot(g))) * g) - GNcomputed = False - - while True: - # if the Cauchy point is outside the trust region, - # take that direction but only to the edge of the trust region - if delta is not None and norm(d_sd) >= delta: - pif('PROGRESS: Using stunted cauchy') - d_dl = np.array(col(delta/norm(d_sd) * d_sd)) - else: - if not GNcomputed: - tm = time.time() - if scipy.sparse.issparse(A): - A.eliminate_zeros() - pif('sparse solve...sparsity infill is %.3f%% (hessian %dx%d), J infill %.3f%%' % ( - 100. * A.nnz / (A.shape[0] * A.shape[1]), - A.shape[0], - A.shape[1], - 100. * J.nnz / (J.shape[0] * J.shape[1]))) - - if g.size > 1: - d_gn = col(solve(A, g)) - if np.any(np.isnan(d_gn)) or np.any(np.isinf(d_gn)): - from scipy.sparse.linalg import lsqr - d_gn = col(lsqr(A, g)[0]) - else: - d_gn = np.atleast_1d(g.ravel()[0]/A[0,0]) - pif('sparse solve...done in %.2fs' % (time.time() - tm)) - else: - pif('dense solve...') - try: - d_gn = col(np.linalg.solve(A, g)) - except Exception: - d_gn = col(np.linalg.lstsq(A, g)[0]) - pif('dense solve...done in %.2fs' % (time.time() - tm)) - GNcomputed = True - - # if the gauss-newton solution is within the trust region, use it - if delta is None or norm(d_gn) <= delta: - pif('PROGRESS: Using gauss-newton solution') - d_dl = np.array(d_gn) - if delta is None: - delta = norm(d_gn) - - else: # between cauchy step and gauss-newton step - pif('PROGRESS: between cauchy and gauss-newton') - - # compute beta multiplier - delta_sq = delta**2 - pnow = ( - (d_gn-d_sd).T.dot(d_gn-d_sd)*delta_sq - + d_gn.T.dot(d_sd)**2 - - sqnorm(d_gn) * (sqnorm(d_sd))) - B = delta_sq - sqnorm(d_sd) - B /= ((d_gn-d_sd).T.dot(d_sd) + math.sqrt(pnow)) - - # apply step - d_dl = np.array(d_sd + float(B) * (d_gn - d_sd)) - - #assert(math.fabs(norm(d_dl) - delta) < 1e-12) - if norm(d_dl) <= e_2*norm(p): - pif('stopping because of small step size (norm_dl < %.2e)' % (e_2*norm(p))) - stop = True - else: - p_new = p + d_dl - - tm_residuals = time.time() - obj.x = p_new - fevals += 1 - - r_trial = obj.r.copy() - tm_residuals = time.time() - tm - - # rho is the ratio of... - # (improvement in SSE) / (predicted improvement in SSE) - - # slower - #rho = norm(e_p)**2 - norm(e_p_trial)**2 - #rho = rho / (L(d_dl*0, e_p, J) - L(d_dl, e_p, J)) - - # faster - sqnorm_ep = sqnorm(r) - rho = sqnorm_ep - norm(r_trial)**2 - - with warnings.catch_warnings(): - warnings.filterwarnings('ignore',category=RuntimeWarning) - if rho > 0: - rho /= predicted_improvement(d_dl, -r, J, sqnorm_ep, A, g) - - improvement_occurred = rho > 0 - - # if the objective function improved, update input parameter estimate. - # Note that the obj.x already has the new parms, - # and we should not set them again to the same (or we'll bust the cache) - if improvement_occurred: - p = col(p_new) - call_cb() - - if (sqnorm_ep - norm(r_trial)**2) / sqnorm_ep < e_3: - stop = True - pif('stopping because improvement < %.1e%%' % (100*e_3,)) - - - else: # Put the old parms back - obj.x = ch.Ch(p) - obj.on_changed('x') # copies from flat vector to free variables - - # if the objective function improved and we're not done, - # get ready for the next iteration - if improvement_occurred and not stop: - tm_jac = time.time() - pif('computing Jacobian...') - J = obj.J.copy() - tm_jac = time.time() - tm_jac - pif('Jacobian (%dx%d) computed in %.2fs' % (J.shape[0], J.shape[1], tm_jac)) - - pif('Residuals+Jac computed in %.2fs' % (tm_jac + tm_residuals,)) - - tm = time.time() - pif('updating A and g...') - A = J.T.dot(J) - r = col(r_trial) - g = col(J.T.dot(-r)) - pif('A and g updated in %.2fs' % (time.time() - tm)) - - if norm(g, np.inf) < e_1: - stop = True - pif('stopping because norm(g, np.inf) < %.2e' % (e_1)) - - # update our trust region - delta = updateRadius(rho, delta, d_dl) - - if delta <= e_2*norm(p): - stop = True - pif('stopping because trust region is too small') - - # the following "collect" is very expensive. - # please contact matt if you find situations where it actually helps things. - #import gc; gc.collect() - if stop or improvement_occurred or (fevals >= max_fevals): - break - if k >= k_max: - pif('stopping because max number of user-specified iterations (%d) has been met' % (k_max,)) - elif fevals >= max_fevals: - pif('stopping because max number of user-specified func evals (%d) has been met' % (max_fevals,)) - - return obj.free_variables - - -def sqnorm(a): - return norm(a)**2 - -def updateRadius(rho, delta, d_dl, lb=.05, ub=.9): - if rho > ub: - delta = max(delta, 2.5*norm(d_dl)) - elif rho < lb: - delta *= .25 - return delta - - -def predicted_improvement(d, e, J, sqnorm_e, JTJ, JTe): - d = col(d) - e = col(e) - aa = .5 * sqnorm_e - bb = JTe.T.dot(d) - c1 = .5 * d.T - c2 = JTJ - c3 = d - cc = c1.dot(c2.dot(c3)) - result = 2. * (aa - bb + cc)[0,0] - return sqnorm_e - result - - -def main(): - pass - - -if __name__ == '__main__': - main() - diff --git a/requirements.txt b/requirements.txt index 4402122..7125758 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ - --e . +numpy>=1.8.1 +scipy>=0.13.0,<0.19.0 diff --git a/setup.py b/setup.py index 69d0006..cc55079 100644 --- a/setup.py +++ b/setup.py @@ -5,19 +5,21 @@ """ from distutils.core import setup -from version import version +import importlib +from pip.req import parse_requirements + +install_reqs = parse_requirements('requirements.txt', session=False) +install_requires = [str(ir.req) for ir in install_reqs] setup(name='chumpy', - version=version, - #py_modules=['ch', 'ch_ops', 'linalg', 'utils', 'api_compatibility', 'ch_random', 'test_ch', 'test_inner_composition', 'test_linalg'], + version=importlib.import_module('chumpy').__version__, packages = ['chumpy'], - package_dir = {'chumpy': '.'}, author='Matthew Loper', author_email='matt.loper@gmail.com', url='https://github.com/mattloper/chumpy', description='chumpy', license='MIT', - install_requires=['numpy >= 1.8.1', 'scipy >= 0.13.0'], + install_requires=install_requires, # See https://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers=[ diff --git a/utils.py b/utils.py deleted file mode 100644 index 18056d3..0000000 --- a/utils.py +++ /dev/null @@ -1,12 +0,0 @@ -""" -Author(s): Matthew Loper - -See LICENCE.txt for licensing and contact information. -""" - -def row(A): - return A.reshape((1, -1)) - - -def col(A): - return A.reshape((-1, 1)) \ No newline at end of file