Skip to content
Permalink
Browse files

Merge 0f36f06 into e1384c2

  • Loading branch information
ferrine committed Jan 20, 2017
2 parents e1384c2 + 0f36f06 commit 6c135fb80495118f6559e3033819530a605e7304
@@ -1,7 +1,10 @@
import itertools
import pkgutil
import io

__all__ = ['get_data_file']
import theano.tensor as tt

__all__ = ['get_data_file', 'DataGenerator']


def get_data_file(pkg, path):
@@ -19,3 +22,30 @@ def get_data_file(pkg, path):
"""

return io.BytesIO(pkgutil.get_data(pkg, path))


class DataGenerator(object):
"""
Helper class that helps to infer data type of generator with looking
at the first item, preserving the order of the resulting generator
"""
def __init__(self, generator):
self.test_value = next(generator)
self.gen = itertools.chain([self.test_value], generator)
self.tensortype = tt.TensorType(
self.test_value.dtype,
((False, ) * self.test_value.ndim))

def __next__(self):
return next(self.gen)

next = __next__

def __iter__(self):
return self

def __eq__(self, other):
return id(self) == id(other)

def __hash__(self):
return hash(id(self))
@@ -30,8 +30,9 @@ def __new__(cls, name, *args, **kwargs):

if isinstance(name, string_types):
data = kwargs.pop('observed', None)
total_size = kwargs.pop('total_size', None)
dist = cls.dist(*args, **kwargs)
return model.Var(name, dist, data)
return model.Var(name, dist, data, total_size)
else:
raise TypeError("Name needs to be a string but got: %s" % name)

@@ -20,7 +20,7 @@ def run(n_samples=3000):
start = model.test_point
h = pm.find_hessian(start, model=model)
step = pm.Metropolis(model.vars, h, blocked=True, model=model)
trace = pm.sample(n_samples, step, start, model=model)
trace = pm.sample(n_samples, step=step, start=start, model=model)
return trace

if __name__ == "__main__":
@@ -13,7 +13,7 @@ def run(n=3000):
if n == "short":
n = 50
with model:
trace = sample(n, step, start)
trace = sample(n, step=step, start=start)

if __name__ == '__main__':
run()
@@ -1,5 +1,6 @@
import threading
import six
import types

import numpy as np
import theano
@@ -8,7 +9,7 @@

import pymc3 as pm
from .memoize import memoize
from .theanof import gradient, hessian, inputvars
from .theanof import gradient, hessian, inputvars, generator
from .vartypes import typefilter, discrete_types, continuous_types
from .blocking import DictToArrayBijection, ArrayOrdering

@@ -458,7 +459,7 @@ def cont_vars(self):
"""All the continuous variables in the model"""
return list(typefilter(self.vars, continuous_types))

def Var(self, name, dist, data=None):
def Var(self, name, dist, data=None, total_size=None):
"""Create and add (un)observed random variable to the model with an
appropriate prior distribution.
@@ -469,6 +470,8 @@ def Var(self, name, dist, data=None):
data : array_like (optional)
If data is provided, the variable is observed. If None,
the variable is unobserved.
total_size : scalar
upscales logp of variable with :math:`coef = total_size/var.shape[0]`
Returns
-------
@@ -477,11 +480,13 @@ def Var(self, name, dist, data=None):
name = self.name_for(name)
if data is None:
if getattr(dist, "transform", None) is None:
var = FreeRV(name=name, distribution=dist, model=self)
var = FreeRV(name=name, distribution=dist, model=self,
total_size=total_size)
self.free_RVs.append(var)
else:
var = TransformedRV(name=name, distribution=dist, model=self,
transform=dist.transform)
transform=dist.transform,
total_size=total_size)
pm._log.debug('Applied {transform}-transform to {name}'
' and added transformed {orig_name} to model.'.format(
transform=dist.transform.name,
@@ -491,7 +496,7 @@ def Var(self, name, dist, data=None):
return var
elif isinstance(data, dict):
var = MultiObservedRV(name=name, data=data, distribution=dist,
model=self)
model=self, total_size=total_size)
self.observed_RVs.append(var)
if var.missing_values:
self.free_RVs += var.missing_values
@@ -500,7 +505,8 @@ def Var(self, name, dist, data=None):
self.named_vars[v.name] = v
else:
var = ObservedRV(name=name, data=data,
distribution=dist, model=self)
distribution=dist, model=self,
total_size=total_size)
self.observed_RVs.append(var)
if var.missing_values:
self.free_RVs.append(var.missing_values)
@@ -717,15 +723,18 @@ class FreeRV(Factor, TensorVariable):
"""Unobserved random variable that a model is specified in terms of."""

def __init__(self, type=None, owner=None, index=None, name=None,
distribution=None, model=None):
distribution=None, model=None, total_size=None):
"""
Parameters
----------
type : theano type (optional)
owner : theano owner (optional)
name : str
distribution : Distribution
model : Model"""
model : Model
total_size : scalar Tensor (optional)
needed for upscaling logp
"""
if type is None:
type = distribution.type
super(FreeRV, self).__init__(type, owner, index, name)
@@ -736,7 +745,14 @@ def __init__(self, type=None, owner=None, index=None, name=None,
self.distribution = distribution
self.tag.test_value = np.ones(
distribution.shape, distribution.dtype) * distribution.default()
self.logp_elemwiset = distribution.logp(self)
logp_elemwiset = distribution.logp(self)
if total_size is None:
coef = tt.constant(1)
else:
assert logp_elemwiset.ndim >= 1, ('Variable with scaled density '
'needs to be at least 1 dimensional')
coef = tt.as_tensor(total_size) / logp_elemwiset.shape[0]
self.logp_elemwiset = logp_elemwiset * coef
self.model = model

incorporate_methods(source=distribution, destination=self,
@@ -759,6 +775,9 @@ def pandas_to_array(data):
return data
elif isinstance(data, theano.gof.graph.Variable):
return data
elif (hasattr(data, '__next__') or
isinstance(data, types.GeneratorType)):
return generator(data)
else:
return np.asarray(data)

@@ -792,7 +811,7 @@ class ObservedRV(Factor, TensorVariable):
"""

def __init__(self, type=None, owner=None, index=None, name=None, data=None,
distribution=None, model=None):
distribution=None, model=None, total_size=None):
"""
Parameters
----------
@@ -801,6 +820,8 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None,
name : str
distribution : Distribution
model : Model
total_size : scalar Tensor (optional)
needed for upscaling logp
"""
from .distributions import TensorType
if type is None:
@@ -814,7 +835,14 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None,
data = as_tensor(data, name, model, distribution)
self.missing_values = data.missing_values

self.logp_elemwiset = distribution.logp(data)
logp_elemwiset = distribution.logp(data)
if total_size is None:
coef = tt.constant(1)
else:
assert logp_elemwiset.ndim >= 1, ('Variable with scaled density '
'needs to be at least 1 dimensional')
coef = tt.as_tensor(total_size) / logp_elemwiset.shape[0]
self.logp_elemwiset = logp_elemwiset * coef
self.model = model
self.distribution = distribution

@@ -835,7 +863,7 @@ class MultiObservedRV(Factor):
Potentially partially observed.
"""

def __init__(self, name, data, distribution, model):
def __init__(self, name, data, distribution, model, total_size=None):
"""
Parameters
----------
@@ -844,14 +872,23 @@ def __init__(self, name, data, distribution, model):
name : str
distribution : Distribution
model : Model
total_size : scalar Tensor (optional)
needed for upscaling logp
"""
self.name = name
self.data = {name: as_tensor(data, name, model, distribution)
for name, data in data.items()}

self.missing_values = [datum.missing_values for datum in self.data.values()
if datum.missing_values is not None]
self.logp_elemwiset = distribution.logp(**self.data)
logp_elemwiset = distribution.logp(**self.data)
if total_size is None:
coef = tt.constant(1)
else:
assert logp_elemwiset.ndim >= 1, ('Variable with scaled density '
'needs to be at least 1 dimensional')
coef = tt.as_tensor(total_size) / logp_elemwiset.shape[0]
self.logp_elemwiset = logp_elemwiset * coef
self.model = model
self.distribution = distribution

@@ -896,17 +933,20 @@ def Potential(name, var, model=None):
class TransformedRV(TensorVariable):

def __init__(self, type=None, owner=None, index=None, name=None,
distribution=None, model=None, transform=None):
distribution=None, model=None, transform=None,
total_size=None):
"""
Parameters
----------
type : theano type (optional)
owner : theano owner (optional)
name : str
distribution : Distribution
model : Model"""
model : Model
total_size : scalar Tensor (optional)
needed for upscaling logp
"""
if type is None:
type = distribution.type
super(TransformedRV, self).__init__(type, owner, index, name)
@@ -916,7 +956,7 @@ def __init__(self, type=None, owner=None, index=None, name=None,

transformed_name = "{}_{}_".format(name, transform.name)
self.transformed = model.Var(
transformed_name, transform.apply(distribution))
transformed_name, transform.apply(distribution), total_size=total_size)

normalRV = transform.backward(self.transformed)

0 comments on commit 6c135fb

Please sign in to comment.
You can’t perform that action at this time.