Skip to content

Commit

Permalink
Merge 91011b5 into 35c6c44
Browse files Browse the repository at this point in the history
  • Loading branch information
ferrine committed Jan 19, 2017
2 parents 35c6c44 + 91011b5 commit 7b6acf7
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 21 deletions.
30 changes: 29 additions & 1 deletion pymc3/data.py
@@ -1,7 +1,10 @@
import itertools
import pkgutil
import io

__all__ = ['get_data_file']
import theano.tensor as tt

__all__ = ['get_data_file', 'DataGenerator']


def get_data_file(pkg, path):
Expand All @@ -19,3 +22,28 @@ def get_data_file(pkg, path):
"""

return io.BytesIO(pkgutil.get_data(pkg, path))


class DataGenerator(object):
"""
Helper class that helps to infer data type of generator with looking
at the first item, preserving the order of the resulting generator
"""
def __init__(self, generator):
self.test_value = next(generator)
self.gen = itertools.chain([self.test_value], generator)
self.tensortype = tt.TensorType(
self.test_value.dtype,
((False, ) * self.test_value.ndim))

def __next__(self):
return next(self.gen)

def __iter__(self):
return self

def __eq__(self, other):
return id(self) == id(other)

def __hash__(self):
return hash(id(self))
3 changes: 2 additions & 1 deletion pymc3/distributions/distribution.py
Expand Up @@ -30,8 +30,9 @@ def __new__(cls, name, *args, **kwargs):

if isinstance(name, string_types):
data = kwargs.pop('observed', None)
total_size = kwargs.pop('total_size', None)
dist = cls.dist(*args, **kwargs)
return model.Var(name, dist, data)
return model.Var(name, dist, data, total_size)
else:
raise TypeError("Name needs to be a string but got: %s" % name)

Expand Down
72 changes: 55 additions & 17 deletions pymc3/model.py
Expand Up @@ -8,7 +8,7 @@

import pymc3 as pm
from .memoize import memoize
from .theanof import gradient, hessian, inputvars
from .theanof import gradient, hessian, inputvars, generator
from .vartypes import typefilter, discrete_types, continuous_types
from .blocking import DictToArrayBijection, ArrayOrdering

Expand Down Expand Up @@ -458,7 +458,7 @@ def cont_vars(self):
"""All the continuous variables in the model"""
return list(typefilter(self.vars, continuous_types))

def Var(self, name, dist, data=None):
def Var(self, name, dist, data=None, total_size=None):
"""Create and add (un)observed random variable to the model with an
appropriate prior distribution.
Expand All @@ -469,6 +469,8 @@ def Var(self, name, dist, data=None):
data : array_like (optional)
If data is provided, the variable is observed. If None,
the variable is unobserved.
total_size : scalar
upscales logp of variable with :math:`coef = total_size/var.shape[0]`
Returns
-------
Expand All @@ -477,11 +479,13 @@ def Var(self, name, dist, data=None):
name = self.name_for(name)
if data is None:
if getattr(dist, "transform", None) is None:
var = FreeRV(name=name, distribution=dist, model=self)
var = FreeRV(name=name, distribution=dist, model=self,
total_size=total_size)
self.free_RVs.append(var)
else:
var = TransformedRV(name=name, distribution=dist, model=self,
transform=dist.transform)
transform=dist.transform,
total_size=total_size)
pm._log.debug('Applied {transform}-transform to {name}'
' and added transformed {orig_name} to model.'.format(
transform=dist.transform.name,
Expand All @@ -491,7 +495,7 @@ def Var(self, name, dist, data=None):
return var
elif isinstance(data, dict):
var = MultiObservedRV(name=name, data=data, distribution=dist,
model=self)
model=self, total_size=total_size)
self.observed_RVs.append(var)
if var.missing_values:
self.free_RVs += var.missing_values
Expand All @@ -500,7 +504,8 @@ def Var(self, name, dist, data=None):
self.named_vars[v.name] = v
else:
var = ObservedRV(name=name, data=data,
distribution=dist, model=self)
distribution=dist, model=self,
total_size=total_size)
self.observed_RVs.append(var)
if var.missing_values:
self.free_RVs.append(var.missing_values)
Expand Down Expand Up @@ -717,15 +722,18 @@ class FreeRV(Factor, TensorVariable):
"""Unobserved random variable that a model is specified in terms of."""

def __init__(self, type=None, owner=None, index=None, name=None,
distribution=None, model=None):
distribution=None, model=None, total_size=None):
"""
Parameters
----------
type : theano type (optional)
owner : theano owner (optional)
name : str
distribution : Distribution
model : Model"""
model : Model
total_size : scalar Tensor (optional)
needed for upscaling logp
"""
if type is None:
type = distribution.type
super(FreeRV, self).__init__(type, owner, index, name)
Expand All @@ -736,7 +744,14 @@ def __init__(self, type=None, owner=None, index=None, name=None,
self.distribution = distribution
self.tag.test_value = np.ones(
distribution.shape, distribution.dtype) * distribution.default()
self.logp_elemwiset = distribution.logp(self)
logp_elemwiset = distribution.logp(self)
if total_size is None:
coef = tt.constant(1)
else:
assert logp_elemwiset.ndim >= 1, ('Variable with scaled density '
'needs to be at least 1 dimensional')
coef = tt.as_tensor(total_size) / logp_elemwiset.shape[0]
self.logp_elemwiset = logp_elemwiset * coef
self.model = model

incorporate_methods(source=distribution, destination=self,
Expand All @@ -759,6 +774,8 @@ def pandas_to_array(data):
return data
elif isinstance(data, theano.gof.graph.Variable):
return data
elif hasattr(data, '__next__'):
return generator(data)
else:
return np.asarray(data)

Expand Down Expand Up @@ -792,7 +809,7 @@ class ObservedRV(Factor, TensorVariable):
"""

def __init__(self, type=None, owner=None, index=None, name=None, data=None,
distribution=None, model=None):
distribution=None, model=None, total_size=None):
"""
Parameters
----------
Expand All @@ -801,6 +818,8 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None,
name : str
distribution : Distribution
model : Model
total_size : scalar Tensor (optional)
needed for upscaling logp
"""
from .distributions import TensorType
if type is None:
Expand All @@ -814,7 +833,14 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None,
data = as_tensor(data, name, model, distribution)
self.missing_values = data.missing_values

self.logp_elemwiset = distribution.logp(data)
logp_elemwiset = distribution.logp(data)
if total_size is None:
coef = tt.constant(1)
else:
assert logp_elemwiset.ndim >= 1, ('Variable with scaled density '
'needs to be at least 1 dimensional')
coef = tt.as_tensor(total_size) / logp_elemwiset.shape[0]
self.logp_elemwiset = logp_elemwiset * coef
self.model = model
self.distribution = distribution

Expand All @@ -835,7 +861,7 @@ class MultiObservedRV(Factor):
Potentially partially observed.
"""

def __init__(self, name, data, distribution, model):
def __init__(self, name, data, distribution, model, total_size=None):
"""
Parameters
----------
Expand All @@ -844,14 +870,23 @@ def __init__(self, name, data, distribution, model):
name : str
distribution : Distribution
model : Model
total_size : scalar Tensor (optional)
needed for upscaling logp
"""
self.name = name
self.data = {name: as_tensor(data, name, model, distribution)
for name, data in data.items()}

self.missing_values = [datum.missing_values for datum in self.data.values()
if datum.missing_values is not None]
self.logp_elemwiset = distribution.logp(**self.data)
logp_elemwiset = distribution.logp(**self.data)
if total_size is None:
coef = tt.constant(1)
else:
assert logp_elemwiset.ndim >= 1, ('Variable with scaled density '
'needs to be at least 1 dimensional')
coef = tt.as_tensor(total_size) / logp_elemwiset.shape[0]
self.logp_elemwiset = logp_elemwiset * coef
self.model = model
self.distribution = distribution

Expand Down Expand Up @@ -896,17 +931,20 @@ def Potential(name, var, model=None):
class TransformedRV(TensorVariable):

def __init__(self, type=None, owner=None, index=None, name=None,
distribution=None, model=None, transform=None):
distribution=None, model=None, transform=None,
total_size=None):
"""
Parameters
----------
type : theano type (optional)
owner : theano owner (optional)
name : str
distribution : Distribution
model : Model"""
model : Model
total_size : scalar Tensor (optional)
needed for upscaling logp
"""
if type is None:
type = distribution.type
super(TransformedRV, self).__init__(type, owner, index, name)
Expand All @@ -916,7 +954,7 @@ def __init__(self, type=None, owner=None, index=None, name=None,

transformed_name = "{}_{}_".format(name, transform.name)
self.transformed = model.Var(
transformed_name, transform.apply(distribution))
transformed_name, transform.apply(distribution), total_size=total_size)

normalRV = transform.backward(self.transformed)

Expand Down
41 changes: 40 additions & 1 deletion pymc3/tests/test_model.py
@@ -1,5 +1,6 @@
import unittest
import theano.tensor as tt
from theano import theano, tensor as tt
import numpy as np
import pymc3 as pm
from pymc3.distributions import HalfCauchy, Normal
from pymc3 import Potential, Deterministic
Expand Down Expand Up @@ -118,3 +119,41 @@ def test_model_root(self):
self.assertTrue(model is model.root)
with pm.Model() as sub:
self.assertTrue(model is sub.root)

def test_density_scaling(self):
with pm.Model() as model1:
Normal('n', observed=[[1]], total_size=1)
p1 = theano.function([], model1.logpt)

with pm.Model() as model2:
Normal('n', observed=[[1]], total_size=2)
p2 = theano.function([], model2.logpt)
self.assertEqual(p1() * 2, p2())

def test_density_scaling_with_genarator(self):
# We have different size generators
def gen1():
i = 0
while True:
yield np.ones((10, 100)) * i
i += 1

def gen2():
i = 0
while True:
yield np.ones((20, 100)) * i
i += 1

# We have same size models
with pm.Model() as model1:
Normal('n', observed=gen1(), total_size=100)
p1 = theano.function([], model1.logpt)

with pm.Model() as model2:
Normal('n', observed=gen2(), total_size=100)
p2 = theano.function([], model2.logpt)

# We want densities to be equal
for _ in range(10):
np.testing.assert_almost_equal(p1(), p2())
# Done
50 changes: 50 additions & 0 deletions pymc3/tests/test_theanof.py
@@ -0,0 +1,50 @@
import itertools
import unittest
import numpy as np
import theano
from ..theanof import DataGenerator, GeneratorOp, generator


def integers():
i = 0
while True:
yield np.float32(i)
i += 1


def integers_ndim(ndim):
i = 0
while True:
yield np.ones((2,) * ndim) * i
i += 1


class TestGenerator(unittest.TestCase):
def test_basic(self):
generator = DataGenerator(integers())
gop = GeneratorOp(generator)()
self.assertEqual(gop.tag.test_value, np.float32(0))
f = theano.function([], gop)
self.assertEqual(f(), np.float32(0))
self.assertEqual(f(), np.float32(1))
for i in range(2, 100):
f()
self.assertEqual(f(), np.float32(100))

def test_ndim(self):
for ndim in range(10):
res = list(itertools.islice(integers_ndim(ndim), 0, 2))
generator = DataGenerator(integers_ndim(ndim))
gop = GeneratorOp(generator)()
f = theano.function([], gop)
self.assertEqual(ndim, res[0].ndim)
np.testing.assert_equal(f(), res[0])
np.testing.assert_equal(f(), res[1])

def test_cloning_available(self):
gop = generator(integers())
res = gop ** 2
shared = theano.shared(np.float32(10))
res1 = theano.clone(res, {gop: shared})
f = theano.function([], res1)
self.assertEqual(f(), np.float32(100))

0 comments on commit 7b6acf7

Please sign in to comment.