Skip to content

Commit

Permalink
Merge 923ed6b into 69ae0b4
Browse files Browse the repository at this point in the history
  • Loading branch information
ferrine committed Feb 20, 2017
2 parents 69ae0b4 + 923ed6b commit 8a64ba4
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 72 deletions.
24 changes: 23 additions & 1 deletion pymc3/data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pkgutil
import io

from copy import copy
import theano.tensor as tt
from .vartypes import isgenerator

Expand Down Expand Up @@ -29,6 +29,28 @@ class DataGenerator(object):
Helper class that helps to infer data type of generator with looking
at the first item, preserving the order of the resulting generator
"""

class Variable(tt.TensorVariable):
def __init__(self, op, type, name=None):
super(DataGenerator.Variable, self).__init__(type=type, name=name)
self.op = op

def set_gen(self, gen):
self.op.set_gen(gen)

def set_default(self, value):
self.op.set_default(value)

def clone(self):
cp = self.__class__(self.op, self.type, self.name)
cp.tag = copy(self.tag)
return cp

def make_variable(self, gop, name=None):
var = self.Variable(gop, self.tensortype, name)
var.tag.test_value = self.test_value
return var

def __init__(self, generator):
if not isgenerator(generator):
raise TypeError('Object should be generator like')
Expand Down
56 changes: 42 additions & 14 deletions pymc3/tests/test_model.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
import unittest
from theano import theano, tensor as tt
import scipy.stats as stats
import numpy as np
import pymc3 as pm
from pymc3.distributions import HalfCauchy, Normal
from pymc3 import Potential, Deterministic
from pymc3.theanof import generator



def gen1():
i = 0
while True:
yield np.ones((10, 100)) * i
i += 1


def gen2():
i = 0
while True:
yield np.ones((20, 100)) * i
i += 1

class NewModel(pm.Model):
def __init__(self, name='', model=None):
super(NewModel, self).__init__(name, model)
Expand Down Expand Up @@ -135,18 +150,12 @@ def test_density_scaling(self):

def test_density_scaling_with_genarator(self):
# We have different size generators
def gen1():
i = 0
while True:
yield np.ones((10, 100)) * i
i += 1

def gen2():
i = 0
while True:
yield np.ones((20, 100)) * i
i += 1

def true_dens():
g = gen1()
for i, point in enumerate(g):
yield stats.norm.logpdf(point * i).sum() * 10
t = true_dens()
# We have same size models
with pm.Model() as model1:
Normal('n', observed=gen1(), total_size=100)
Expand All @@ -157,7 +166,26 @@ def gen2():
Normal('n', observed=gen_var, total_size=100)
p2 = theano.function([], model2.logpt)

# We want densities to be equal
for _ in range(10):
np.testing.assert_almost_equal(p1(), p2())
for i in range(10):
_1, _2, _t = p1(), p2(), next(t)
np.testing.assert_almost_equal(_1, _t)
np.testing.assert_almost_equal(_1, _2)
# Done

def test_gradient_with_scaling(self):
with pm.Model() as model1:
genvar = generator(gen1())
m = Normal('m')
Normal('n', observed=genvar, total_size=1000)
grad1 = theano.function([m], tt.grad(model1.logpt, m))
with pm.Model() as model2:
m = Normal('m')
shavar = theano.shared(np.ones((1000, 100)))
Normal('n', observed=shavar)
grad2 = theano.function([m], tt.grad(model2.logpt, m))

for i in range(10):
shavar.set_value(np.ones((100, 100)) * i)
g1 = grad1(1)
g2 = grad2(1)
np.testing.assert_almost_equal(g1, g2)
39 changes: 17 additions & 22 deletions pymc3/tests/test_theanof.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,34 +48,29 @@ def test_cloning_available(self):
f = theano.function([], res1)
self.assertEqual(f(), np.float32(100))

def test_nans_produced(self):
def test_default_value(self):
def gen():
for _ in range(2):
yield np.ones((10, 10))
for i in range(2):
yield np.ones((10, 10)) * i

gop = generator(gen())
gop = generator(gen(), np.ones((10, 10)) * 10)
f = theano.function([], gop)
res = [f() for _ in range(3)]
np.testing.assert_equal(np.ones((10, 10)), res[0])
np.testing.assert_equal(np.ones((10, 10)), res[1])
self.assertTrue(res[2].shape == (10, 10))
self.assertTrue(np.isnan(res[2]).all())
np.testing.assert_equal(np.ones((10, 10)) * 0, f())
np.testing.assert_equal(np.ones((10, 10)) * 1, f())
np.testing.assert_equal(np.ones((10, 10)) * 10, f())
self.assertRaises(ValueError, gop.set_default, 1)

def test_setvalue(self):
def test_set_gen_and_exc(self):
def gen():
for _ in range(2):
yield np.ones((10, 10))
for i in range(2):
yield np.ones((10, 10)) * i

gop = generator(gen())
f = theano.function([], gop)
res = [f() for _ in range(3)]
np.testing.assert_equal(np.ones((10, 10)), res[0])
np.testing.assert_equal(np.ones((10, 10)), res[1])
self.assertTrue(res[2].shape == (10, 10))
self.assertTrue(np.isnan(res[2]).all())
np.testing.assert_equal(np.ones((10, 10)) * 0, f())
np.testing.assert_equal(np.ones((10, 10)) * 1, f())
self.assertRaises(StopIteration, f)
gop.set_gen(gen())
res = [f() for _ in range(3)]
np.testing.assert_equal(np.ones((10, 10)), res[0])
np.testing.assert_equal(np.ones((10, 10)), res[1])
self.assertTrue(res[2].shape == (10, 10))
self.assertTrue(np.isnan(res[2]).all())
np.testing.assert_equal(np.ones((10, 10)) * 0, f())
np.testing.assert_equal(np.ones((10, 10)) * 1, f())
self.assertRaises(StopIteration, f)
94 changes: 59 additions & 35 deletions pymc3/theanof.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .vartypes import typefilter, continuous_types
from theano import theano, scalar, tensor as tt
from theano.gof.graph import inputs
from theano.gof import Op
from theano.gof import Op, Container
from theano.configparser import change_flags
from .memoize import memoize
from .blocking import ArrayOrdering
Expand Down Expand Up @@ -244,51 +244,45 @@ def __call__(self, input):

class GeneratorOp(Op):
"""
Generaror Op is designed for storing python generators
inside theano graph. The main limitation is generator itself.
There are some important cases when generator becomes exhausted
- not endless generator is passed
- exception is raised while `generator.__next__` is performed
Note: it is dangerous in simple python generators, but ok in
custom class based generators with explicit state
- data type on each iteration should be the same
Generaror Op is designed for storing python generators inside theano graph.
__call__ creates TensorVariable
It has 2 new methods (assigned externally)
- var.set_gen(gen) : sets new generator
- var.set_default(value) : sets new default value (None erases default value)
If generator is exhausted, variable will produce default value if it is not None,
else raises `StopIteration` exception that can be caught on runtime.
Parameters
----------
gen : generator that implements __next__ (py3) or next (py2) method
and yields np.arrays with same types
default : np.array with the same type as generator produces
"""
__props__ = ('generator',)

def __init__(self, gen):
def __init__(self, gen, default=None):
super(GeneratorOp, self).__init__()
if not isinstance(gen, DataGenerator):
gen = DataGenerator(gen)
self.generator = gen
self.itypes = []
self.otypes = [self.generator.tensortype]
self._nan = np.zeros_like(self.generator.test_value)
self._nan[...] = np.nan
self.set_default(default)

def make_node(self, *inputs):
gen_var = self.generator.make_variable(self)
return theano.Apply(self, [], [gen_var])

def perform(self, node, inputs, output_storage, params=None):
try:
if self.default is not None:
output_storage[0][0] = next(self.generator, self.default)
else:
output_storage[0][0] = next(self.generator)
except StopIteration:
output_storage[0][0] = self._nan

def do_constant_folding(self, node):
return False

class _set_gen(object):
"""For pickling"""
def __init__(self, op):
self.op = op

def __call__(self, gen):
self.op.set_gen(gen)

@change_flags(compute_test_value='off')
def __call__(self, *args, **kwargs):
rval = super(GeneratorOp, self).__call__(*args, **kwargs)
rval.set_gen = self._set_gen(self)
rval.tag.test_value = self.generator.test_value
return rval
__call__ = change_flags(compute_test_value='off')(Op.__call__)

def set_gen(self, gen):
if not isinstance(gen, DataGenerator):
Expand All @@ -297,7 +291,37 @@ def set_gen(self, gen):
raise ValueError('New generator should yield the same type')
self.generator = gen

def set_default(self, value):
if value is None:
self.default = None
else:
value = np.asarray(value)
t1 = (value.dtype, ((False,) * value.ndim))
t2 = (self.generator.tensortype.dtype,
self.generator.tensortype.broadcastable)
if not t1 == t2:
raise ValueError('Default value should have the '
'same type as generator')
self.default = value


def generator(gen, default=None):
"""
Generator variable with possibility to set default value and new generator.
If generator is exhausted variable will produce default value if it is not None,
else raises `StopIteration` exception that can be caught on runtime.
Parameters
----------
gen : generator that implements __next__ (py3) or next (py2) method
and yields np.arrays with same types
default : np.array with the same type as generator produces
def generator(gen):
"""shortcut for `GeneratorOp`"""
return GeneratorOp(gen)()
Returns
-------
TensorVariable
It has 2 new methods (assigned externally)
- var.set_gen(gen) : sets new generator
- var.set_default(value) : sets new default value (None erases default value)
"""
return GeneratorOp(gen, default)()

0 comments on commit 8a64ba4

Please sign in to comment.