diff --git a/.travis.yml b/.travis.yml
index c6f6d3c07a6..432216fb3ae 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,14 +22,14 @@ install:
   - conda list && pip freeze
 
 env:
-  - FLOATX='float32' TESTCMD="--durations=10 --ignore=pymc3/tests/test_examples.py --cov-append --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py"
+  - FLOATX='float32' TESTCMD="--durations=10 --ignore=pymc3/tests/test_examples.py --cov-append --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py --ignore=pymc3/tests/test_shape_handling.py"
   - FLOATX='float32' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pymc3/tests/test_distributions_random.py pymc3/tests/test_shared.py pymc3/tests/test_smc.py pymc3/tests/test_sampling.py pymc3/tests/test_parallel_sampling.py pymc3/tests/test_dist_math.py pymc3/tests/test_distribution_defaults.py pymc3/tests/test_distributions_timeseries.py pymc3/tests/test_random.py"
   - FLOATX='float32' TESTCMD="--durations=10 --cov-append pymc3/tests/test_examples.py pymc3/tests/test_posteriors.py pymc3/tests/test_gp.py"
-  - FLOATX='float32' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py"
-  - FLOATX='float64' TESTCMD="--durations=10 --cov-append --ignore=pymc3/tests/test_examples.py --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py"
+  - FLOATX='float32' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py pymc3/tests/test_shape_handling.py"
+  - FLOATX='float64' TESTCMD="--durations=10 --cov-append --ignore=pymc3/tests/test_examples.py --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py --ignore=pymc3/tests/test_shape_handling.py"
   - FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_distributions_random.py pymc3/tests/test_shared.py pymc3/tests/test_smc.py pymc3/tests/test_sampling.py pymc3/tests/test_parallel_sampling.py pymc3/tests/test_dist_math.py pymc3/tests/test_distribution_defaults.py pymc3/tests/test_distributions_timeseries.py pymc3/tests/test_random.py"
   - FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_examples.py pymc3/tests/test_posteriors.py pymc3/tests/test_gp.py"
-  - FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py"
+  - FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py pymc3/tests/test_shape_handling.py"
 
 script:
   - . ./scripts/test.sh $TESTCMD
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 2a2afc01bc2..83e35401978 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -8,6 +8,7 @@
 - Add function `set_data` to update variables defined as `Data`.
 - `Mixture` now supports mixtures of multidimensional probability distributions, not just lists of 1D distributions.
 - `GLM.from_formula` and `LinearComponent.from_formula` can extract variables from the calling scope. Customizable via the new `eval_env` argument. Fixing #3382.
+- Added the `distributions.shape_utils` module with functions used to help broadcast samples drawn from distributions using the `size` keyword argument.
 
 ### Maintenance
 - All occurances of `sd` as a parameter name have been renamed to `sigma`. `sd` will continue to function for backwards compatibility.
@@ -31,6 +32,8 @@
 - Add `sigma`, `tau`, and `sd` to signature of `NormalMixture`.
 - Resolved issue #3248. Set default lower and upper values of -inf and inf for pm.distributions.continuous.TruncatedNormal. This avoids errors caused by their previous values of None.
 - Resolved issue #3399. Converted all calls to `pm.distributions.bound._ContinuousBounded` and `pm.distributions.bound._DiscreteBounded` to use only and all positional arguments.
+- Restructured `distributions.distribution.generate_samples` to use the `shape_utils` module. This solves issues #3421 and #3147 by using the `size` aware broadcating functions in `shape_utils`.
+- Fixed the `Multinomial.random` and `Multinomial.random_` methods to make them compatible with the new `generate_samples` function. In the process, a bug of the `Multinomial.random_` shape handling was discovered and fixed.
 
 ### Deprecations
 
diff --git a/pymc3/distributions/bound.py b/pymc3/distributions/bound.py
index 39da708698d..cffc3cc819b 100644
--- a/pymc3/distributions/bound.py
+++ b/pymc3/distributions/bound.py
@@ -59,7 +59,7 @@ def _random(self, lower, upper, point=None, size=None):
                 "Drawing samples from distributions with "
                 "array-valued bounds is not supported."
             )
-        total_size = np.prod(size)
+        total_size = np.prod(size).astype(np.int)
         samples = []
         s = 0
         while s < total_size:
@@ -81,17 +81,32 @@ def random(self, point=None, size=None):
         elif self.lower is not None and self.upper is not None:
             lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
             return generate_samples(
-                self._random, lower, upper, point, dist_shape=self.shape, size=size
+                self._random,
+                lower,
+                upper,
+                dist_shape=self.shape,
+                size=size,
+                not_broadcast_kwargs={'point': point},
             )
         elif self.lower is not None:
             lower = draw_values([self.lower], point=point, size=size)
             return generate_samples(
-                self._random, lower, np.inf, point, dist_shape=self.shape, size=size
+                self._random,
+                lower,
+                np.inf,
+                dist_shape=self.shape,
+                size=size,
+                not_broadcast_kwargs={'point': point},
             )
         else:
             upper = draw_values([self.upper], point=point, size=size)
             return generate_samples(
-                self._random, -np.inf, upper, point, dist_shape=self.shape, size=size
+                self._random,
+                -np.inf,
+                upper,
+                dist_shape=self.shape,
+                size=size,
+                not_broadcast_kwargs={'point': point},
             )
 
 
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 0901412b3ba..0e2b7a204a2 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -19,8 +19,7 @@
     alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow,
     normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue,
 )
-from .distribution import (Continuous, draw_values, generate_samples,
-                           broadcast_distribution_samples)
+from .distribution import (Continuous, draw_values, generate_samples)
 
 __all__ = ['Uniform', 'Flat', 'HalfFlat', 'Normal', 'TruncatedNormal', 'Beta',
            'Kumaraswamy', 'Exponential', 'Laplace', 'StudentT', 'Cauchy',
@@ -966,8 +965,6 @@ def random(self, point=None, size=None):
         """
         mu, lam, alpha = draw_values([self.mu, self.lam, self.alpha],
                                      point=point, size=size)
-        mu, lam, alpha = broadcast_distribution_samples([mu, lam, alpha],
-                                                        size=size)
         return generate_samples(self._random,
                                 mu, lam, alpha,
                                 dist_shape=self.shape,
@@ -1297,7 +1294,6 @@ def random(self, point=None, size=None):
         """
         a, b = draw_values([self.a, self.b],
                            point=point, size=size)
-        a, b = broadcast_distribution_samples([a, b], size=size)
         return generate_samples(self._random, a, b,
                                 dist_shape=self.shape,
                                 size=size)
@@ -1674,7 +1670,6 @@ def random(self, point=None, size=None):
         array
         """
         mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
-        mu, tau = broadcast_distribution_samples([mu, tau], size=size)
         return generate_samples(self._random, mu, tau,
                                 dist_shape=self.shape,
                                 size=size)
@@ -1965,7 +1960,6 @@ def random(self, point=None, size=None):
         """
         alpha, m = draw_values([self.alpha, self.m],
                                point=point, size=size)
-        alpha, m = broadcast_distribution_samples([alpha, m], size=size)
         return generate_samples(self._random, alpha, m,
                                 dist_shape=self.shape,
                                 size=size)
@@ -2090,7 +2084,6 @@ def random(self, point=None, size=None):
         """
         alpha, beta = draw_values([self.alpha, self.beta],
                                   point=point, size=size)
-        alpha, beta = broadcast_distribution_samples([alpha, beta], size=size)
         return generate_samples(self._random, alpha, beta,
                                 dist_shape=self.shape,
                                 size=size)
@@ -2669,7 +2662,6 @@ def random(self, point=None, size=None):
         """
         alpha, beta = draw_values([self.alpha, self.beta],
                                   point=point, size=size)
-        alpha, beta = broadcast_distribution_samples([alpha, beta], size=size)
 
         def _random(a, b, size=None):
             return b * (-np.log(np.random.uniform(size=size)))**(1 / a)
@@ -2963,8 +2955,6 @@ def random(self, point=None, size=None):
         """
         mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu],
                                     point=point, size=size)
-        mu, sigma, nu = broadcast_distribution_samples([mu, sigma, nu],
-                                                       size=size)
 
         def _random(mu, sigma, nu, size=None):
             return (np.random.normal(mu, sigma, size=size)
@@ -3369,7 +3359,7 @@ def random(self, point=None, size=None):
         scale = upper - lower
         c_ = (c - lower) / scale
         return generate_samples(stats.triang.rvs, c=c_, loc=lower, scale=scale,
-                                size=size, dist_shape=self.shape, random_state=None)
+                                size=size, dist_shape=self.shape)
 
     def logp(self, value):
         """
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index a6fd0f5f7d5..b431c234581 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -5,8 +5,8 @@
 
 from pymc3.util import get_variable_name
 from .dist_math import bound, factln, binomln, betaln, logpow, random_choice
-from .distribution import (Discrete, draw_values, generate_samples,
-                           broadcast_distribution_samples)
+from .distribution import Discrete, draw_values, generate_samples
+from .shape_utils import broadcast_distribution_samples
 from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp
 from ..theanof import floatX, intX
 
@@ -351,7 +351,6 @@ def _random(self, q, beta, size=None):
 
     def random(self, point=None, size=None):
         q, beta = draw_values([self.q, self.beta], point=point, size=size)
-        q, beta = broadcast_distribution_samples([q, beta], size=size)
 
         return generate_samples(self._random, q, beta,
                                 dist_shape=self.shape,
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index ff776b0d32d..88869012b7d 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -11,6 +11,10 @@
 )
 from ..vartypes import string_types
 from .dist_math import to_tuple
+from .shape_utils import (
+    get_broadcastable_dist_samples,
+    broadcast_dist_samples_shape,
+)
 
 __all__ = ['DensityDist', 'Distribution', 'Continuous', 'Discrete',
            'NoDistribution', 'TensorType', 'draw_values', 'generate_samples']
@@ -286,7 +290,6 @@ def draw_values(params, point=None, size=None):
 
             a) are named parameters in the point
             b) are *RVs with a random method
-
     """
     # Get fast drawable values (i.e. things in point or numbers, arrays,
     # constants or shares, or things that were already drawn in related
@@ -602,160 +605,83 @@ def generate_samples(generator, *args, **kwargs):
     not_broadcast_kwargs = kwargs.pop('not_broadcast_kwargs', None)
     if not_broadcast_kwargs is None:
         not_broadcast_kwargs = dict()
-    if size is None:
-        size = 1
 
+    # Parse out raw input parameters for the generator
     args = tuple(p[0] if isinstance(p, tuple) else p for p in args)
-
     for key in kwargs:
         p = kwargs[key]
         kwargs[key] = p[0] if isinstance(p, tuple) else p
 
+    # Convert size and dist_shape to tuples
+    size_tup = to_tuple(size)
+    dist_shape = to_tuple(dist_shape)
+
     if broadcast_shape is None:
+        # If broadcast_shape is not explicitly provided, it is inferred as the
+        # broadcasted shape of the input parameter and dist_shape, taking into
+        # account the potential size prefix
         inputs = args + tuple(kwargs.values())
-        try:
-            broadcast_shape = np.broadcast(*inputs).shape  # size of generator(size=1)
-        except ValueError:  # sometimes happens if args have shape (500,) and (500, 4)
-            max_dims = max(j.ndim for j in args + tuple(kwargs.values()))
-            args = tuple([j.reshape(j.shape + (1,) * (max_dims - j.ndim)) for j in args])
-            kwargs = {k: v.reshape(v.shape + (1,) * (max_dims - v.ndim)) for k, v in kwargs.items()}
-            inputs = args + tuple(kwargs.values())
-            broadcast_shape = np.broadcast(*inputs).shape  # size of generator(size=1)
+        broadcast_shape = broadcast_dist_samples_shape(
+            [np.asarray(i).shape for i in inputs] + [dist_shape],
+            size=size_tup
+        )
+        # We do this instead of broadcast_distribution_samples to avoid
+        # creating a dummy array with dist_shape in memory
+        inputs = get_broadcastable_dist_samples(
+            inputs,
+            size=size_tup,
+            must_bcast_with=broadcast_shape,
+        )
+        # We modify the arguments with their broadcasted counterparts
+        args = tuple(inputs[:len(args)])
+        for offset, key in enumerate(kwargs):
+            kwargs[key] = inputs[len(args) + offset]
     # Update kwargs with the keyword arguments that were not broadcasted
     kwargs.update(not_broadcast_kwargs)
 
-    dist_shape = to_tuple(dist_shape)
+    # We ensure that broadcast_shape is a tuple
     broadcast_shape = to_tuple(broadcast_shape)
-    size_tup = to_tuple(size)
 
-    # All inputs are scalars, end up size (size_tup, dist_shape)
-    if broadcast_shape in {(), (0,), (1,)}:
-        samples = generator(size=size_tup + dist_shape, *args, **kwargs)
-    # Inputs already have the right shape. Just get the right size.
-    elif broadcast_shape[-len(dist_shape):] == dist_shape or len(dist_shape) == 0:
-        if size == 1 or (broadcast_shape == size_tup + dist_shape):
-            samples = generator(size=broadcast_shape, *args, **kwargs)
-        elif dist_shape == broadcast_shape:
-            samples = generator(size=size_tup + dist_shape, *args, **kwargs)
-        elif len(dist_shape) == 0 and size_tup and broadcast_shape:
-            # There is no dist_shape (scalar distribution) but the parameters
-            # broadcast shape and size_tup determine the size to provide to
-            # the generator
-            if broadcast_shape[:len(size_tup)] == size_tup:
-                # Input's dist_shape is scalar, but it has size repetitions.
-                # So now the size matches but we have to manually broadcast to
-                # the right dist_shape
-                samples = [generator(*args, **kwargs)]
-                if samples[0].shape == broadcast_shape:
-                    samples = samples[0]
-                else:
-                    suffix = broadcast_shape[len(size_tup):] + dist_shape
-                    samples.extend([generator(*args, **kwargs).
-                                    reshape(broadcast_shape)[..., np.newaxis]
-                                    for _ in range(np.prod(suffix,
-                                                           dtype=int) - 1)])
-                    samples = np.hstack(samples).reshape(size_tup + suffix)
-            else:
-                # The parameter shape is given, but we have to concatenate it
-                # with the size tuple
-                samples = generator(size=size_tup + broadcast_shape,
-                                    *args,
-                                    **kwargs)
-        else:
-            samples = None
-    # Args have been broadcast correctly, can just ask for the right shape out
-    elif dist_shape[-len(broadcast_shape):] == broadcast_shape:
-        samples = generator(size=size_tup + dist_shape, *args, **kwargs)
-    # Inputs have the right size, have to manually broadcast to the right dist_shape
-    elif broadcast_shape[:len(size_tup)] == size_tup:
-        suffix = broadcast_shape[len(size_tup):] + dist_shape
-        samples = [generator(*args, **kwargs).reshape(size_tup + (1,)) for _ in range(np.prod(suffix, dtype=int))]
-        samples = np.hstack(samples).reshape(size_tup + suffix)
+    if dist_shape[:len(size_tup)] == size_tup:
+        # dist_shape is prepended with size_tup. This is not a consequence
+        # of the parameters being drawn size_tup times! By chance, the
+        # distribution's shape has its first elements equal to size_tup.
+        # This means that we must prepend the size_tup to dist_shape, and
+        # check if that broadcasts well with the parameters
+        _dist_shape = size_tup + dist_shape
     else:
-        samples = None
-
-    if samples is None:
+        _dist_shape = dist_shape
+    try:
+        dist_bcast_shape = broadcast_dist_samples_shape(
+            [_dist_shape, broadcast_shape],
+            size=size,
+        )
+    except (ValueError, TypeError):
         raise TypeError('''Attempted to generate values with incompatible shapes:
             size: {size}
             size_tup: {size_tup}
             broadcast_shape[:len(size_tup)] == size_tup: {test}
             dist_shape: {dist_shape}
             broadcast_shape: {broadcast_shape}
-        '''.format(size=size, size_tup=size_tup, dist_shape=dist_shape, broadcast_shape=broadcast_shape, test=broadcast_shape[:len(size_tup)] == size_tup))
+        '''.format(size=size,
+                   size_tup=size_tup,
+                   dist_shape=dist_shape,
+                   broadcast_shape=broadcast_shape,
+                   test=broadcast_shape[:len(size_tup)] == size_tup)
+        )
+    if dist_bcast_shape[:len(size_tup)] == size_tup:
+        samples = generator(size=dist_bcast_shape, *args, **kwargs)
+    else:
+        samples = generator(size=size_tup + dist_bcast_shape, *args, **kwargs)
+    samples = np.asarray(samples)
 
     # reshape samples here
-    if samples.shape[0] == 1 and size == 1:
-        if len(samples.shape) > len(dist_shape) and samples.shape[-len(dist_shape):] == dist_shape:
+    if samples.ndim > 0 and samples.shape[0] == 1 and size_tup == (1,):
+        if (len(samples.shape) > len(dist_shape) and
+            samples.shape[-len(dist_shape):] == dist_shape[-len(dist_shape):]
+        ):
             samples = samples.reshape(samples.shape[1:])
 
-    if one_d and samples.shape[-1] == 1:
+    if one_d and samples.ndim > 0 and samples.shape[-1] == 1:
         samples = samples.reshape(samples.shape[:-1])
-    return np.asarray(samples)
-
-
-def broadcast_distribution_samples(samples, size=None):
-    """Broadcast samples drawn from distributions taking into account the
-    size (i.e. the number of samples) of the draw, which is prepended to
-    the sample's shape.
-
-    Parameters
-    ----------
-    samples: Iterable of ndarrays holding the sampled values
-    size: None, int or tuple (optional)
-        size of the sample set requested.
-
-    Returns
-    -------
-    List of broadcasted sample arrays
-
-    Examples
-    --------
-    .. code-block:: python
-        size = 100
-        sample0 = np.random.randn(size)
-        sample1 = np.random.randn(size, 5)
-        sample2 = np.random.randn(size, 4, 5)
-        out = broadcast_distribution_samples([sample0, sample1, sample2],
-                                             size=size)
-        assert all((o.shape == (size, 4, 5) for o in out))
-        assert np.all(sample0[:, None, None] == out[0])
-        assert np.all(sample1[:, None, :] == out[1])
-        assert np.all(sample2 == out[2])
-
-    .. code-block:: python
-        size = 100
-        sample0 = np.random.randn(size)
-        sample1 = np.random.randn(5)
-        sample2 = np.random.randn(4, 5)
-        out = broadcast_distribution_samples([sample0, sample1, sample2],
-                                             size=size)
-        assert all((o.shape == (size, 4, 5) for o in out))
-        assert np.all(sample0[:, None, None] == out[0])
-        assert np.all(sample1 == out[1])
-        assert np.all(sample2 == out[2])
-    """
-    if size is None:
-        return np.broadcast_arrays(*samples)
-    _size = to_tuple(size)
-    # Raw samples shapes
-    p_shapes = [p.shape for p in samples]
-    # samples shapes without the size prepend
-    sp_shapes = [s[len(_size):] if _size == s[:len(_size)] else s
-                 for s in p_shapes]
-    broadcast_shape = np.broadcast(*[np.empty(s) for s in sp_shapes]).shape
-    broadcasted_samples = []
-    for param, p_shape, sp_shape in zip(samples, p_shapes, sp_shapes):
-        if _size == p_shape[:len(_size)]:
-            # If size prepends the shape, then we have to add broadcasting axis
-            # in the middle
-            slicer_head = [slice(None)] * len(_size)
-            slicer_tail = ([np.newaxis] * (len(broadcast_shape) -
-                                           len(sp_shape)) +
-                           [slice(None)] * len(sp_shape))
-        else:
-            # If size does not prepend the shape, then we have leave the
-            # parameter as is
-            slicer_head = []
-            slicer_tail = [slice(None)] * len(sp_shape)
-        broadcasted_samples.append(param[tuple(slicer_head + slicer_tail)])
-    return np.broadcast_arrays(*broadcasted_samples)
+    return np.asarray(samples)
\ No newline at end of file
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index ee18fed8b76..4bae5aeb821 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -8,8 +8,8 @@
 from .dist_math import bound, random_choice, to_tuple
 from .distribution import (Discrete, Distribution, draw_values,
                            generate_samples, _DrawValuesContext,
-                           _DrawValuesContextBlocker,
-                           broadcast_distribution_samples)
+                           _DrawValuesContextBlocker)
+from .shape_utils import broadcast_distribution_samples
 from .continuous import get_tau_sigma, Normal
 from ..theanof import _conversion_map
 
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index d773036d77e..cefea4a7e1c 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -21,7 +21,7 @@
 from ..model import Deterministic
 from .continuous import ChiSquared, Normal
 from .special import gammaln, multigammaln
-from .dist_math import bound, logpow, factln
+from .dist_math import bound, logpow, factln, to_tuple
 from ..math import kron_dot, kron_diag, kron_solve_lower, kronecker
 
 
@@ -520,11 +520,6 @@ def __init__(self, n, p, *args, **kwargs):
         n = np.squeeze(n) # works also if n is a tensor
 
         if len(self.shape) > 1:
-            m = self.shape[-2]
-            # try:
-            #     assert n.shape == (m,)
-            # except (AttributeError, AssertionError):
-            #     n = n * tt.ones(m)
             self.n = tt.shape_padright(n)
             self.p = p if p.ndim > 1 else tt.shape_padleft(p)
         elif n.ndim == 1:
@@ -543,104 +538,49 @@ def __init__(self, n, p, *args, **kwargs):
                                 diff[inc_bool_arr.nonzero()])
         self.mode = mode
 
-    def _random(self, n, p, size=None):
+    def _random(self, n, p, size=None, raw_size=None):
         original_dtype = p.dtype
         # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317)
         p = p.astype('float64')
         # Now, re-normalize all of the values in float64 precision. This is done inside the conditionals
+        p /= np.sum(p, axis=-1, keepdims=True)
 
-        # np.random.multinomial needs `n` to be a scalar int and `p` a
-        # sequence
-        if p.ndim == 1 and (n.ndim == 0 or (n.ndim == 1 and n.shape[0] == 1)):
-            # If `n` is already a scalar and `p` is a sequence, then just
-            # return np.multinomial with some size handling
-            p = p / p.sum()
-            if size is not None:
-                if size == p.shape:
-                    size = None
-                elif size[-len(p.shape):] == p.shape:
-                    size = size[:len(size) - len(p.shape)]
-            randnum = np.random.multinomial(n, p, size=size)
-            return randnum.astype(original_dtype)
-        # The shapes of `p` and `n` must be broadcasted by hand depending on
-        # their ndim. We will assume that the last axis of the `p` array will
-        # be the sequence to feed into np.random.multinomial. The other axis
-        # will only have to be iterated over.
-        if n.ndim == p.ndim:
-            # p and n have the same ndim, so n.shape[-1] must be 1
-            if n.shape[-1] != 1:
-                raise ValueError('If n and p have the same number of '
-                                 'dimensions, the last axis of n must be '
-                                 'have len 1. Got {} instead.\n'
-                                 'n.shape = {}\n'
-                                 'p.shape = {}.'.format(n.shape[-1],
-                                                        n.shape,
-                                                        p.shape))
-            n_p_shape = np.broadcast(np.empty(p.shape[:-1]),
-                                     np.empty(n.shape[:-1])).shape
-            p = np.broadcast_to(p, n_p_shape + (p.shape[-1],))
-            n = np.broadcast_to(n, n_p_shape + (1,))
-        elif n.ndim == p.ndim - 1:
-            # n has the number of dimensions of p for the iteration, these must
-            # broadcast together
-            n_p_shape = np.broadcast(np.empty(p.shape[:-1]),
-                                     n).shape
-            p = np.broadcast_to(p, n_p_shape + (p.shape[-1],))
-            n = np.broadcast_to(n, n_p_shape + (1,))
-        elif p.ndim == 1:
-            # p only has the sequence array. We extend it with the dimensions
-            # of n
-            n_p_shape = n.shape
-            p = np.broadcast_to(p, n_p_shape + (p.shape[-1],))
-            n = np.broadcast_to(n, n_p_shape + (1,))
-        elif n.ndim == 0 or (n.dim == 1 and n.shape[0] == 1):
-            # n is a scalar. We extend it with the dimensions of p
-            n_p_shape = p.shape[:-1]
-            n = np.broadcast_to(n, n_p_shape + (1,))
-        else:
-            # There is no clear rule to broadcast p and n so we raise an error
-            raise ValueError('Incompatible shapes of n and p.\n'
-                             'n.shape = {}\n'
-                             'p.shape = {}'.format(n.shape, p.shape))
-
-        # Check what happens with size
-        if size is not None:
-            if size == p.shape:
-                size = None
-                _size = 1
-            elif size[-len(p.shape):] == p.shape:
-                size = size[:len(size) - len(p.shape)]
-                _size = np.prod(size)
-            else:
-                _size = np.prod(size)
-        else:
-            _size = 1
+        # Thanks to the default shape handling done in generate_values, the last
+        # axis of n is a dummy axis that allows it to broadcast well with p
+        n = np.broadcast_to(n, size)
+        p = np.broadcast_to(p, size)
+        n = n[..., 0]
 
-        # We now flatten p and n up to the last dimension
-        p_shape = p.shape
-        p = np.reshape(p, (np.prod(n_p_shape), -1))
-        n = np.reshape(n, (np.prod(n_p_shape), -1))
-        # We renormalize p
-        p = p / p.sum(axis=1, keepdims=True)
-        # We iterate calls to np.random.multinomial
-        randnum = np.asarray([
-            np.random.multinomial(nn, pp, size=_size)
-            for (nn, pp) in zip(n, p)
-        ])
-        # We swap the iteration axis with the _size axis
-        randnum = np.moveaxis(randnum, 1, 0)
-        # We reshape the random numbers to the corresponding size + p_shape
-        if size is None:
-            randnum = np.reshape(randnum, p_shape)
+        # np.random.multinomial needs `n` to be a scalar int and `p` a
+        # sequence so we semi flatten them and iterate over them
+        size_ = to_tuple(raw_size)
+        if p.ndim > len(size_) and p.shape[:len(size_)] == size_:
+            # p and n have the size_ prepend so we don't need it in np.random
+            n_ = n.reshape([-1])
+            p_ = p.reshape([-1, p.shape[-1]])
+            samples = np.array([
+                np.random.multinomial(nn, pp)
+                for nn, pp in zip(n_, p_)
+            ])
+            samples = samples.reshape(p.shape)
         else:
-            randnum = np.reshape(randnum, size + p_shape)
+            # p and n don't have the size prepend
+            n_ = n.reshape([-1])
+            p_ = p.reshape([-1, p.shape[-1]])
+            samples = np.array([
+                np.random.multinomial(nn, pp, size=size_)
+                for nn, pp in zip(n_, p_)
+            ])
+            samples = np.moveaxis(samples, 0, -1)
+            samples = samples.reshape(size + p.shape)
         # We cast back to the original dtype
-        return randnum.astype(original_dtype)
+        return samples.astype(original_dtype)
 
     def random(self, point=None, size=None):
         n, p = draw_values([self.n, self.p], point=point, size=size)
         samples = generate_samples(self._random, n, p,
                                    dist_shape=self.shape,
+                                   not_broadcast_kwargs={'raw_size': size},
                                    size=size)
         return samples
 
diff --git a/pymc3/distributions/shape_utils.py b/pymc3/distributions/shape_utils.py
new file mode 100644
index 00000000000..b799ef0d614
--- /dev/null
+++ b/pymc3/distributions/shape_utils.py
@@ -0,0 +1,362 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+A collection of common numpy array shape operations needed for broadcasting
+samples from probability distributions for stochastic nodes in PyMC.
+"""
+import numpy as np
+from .dist_math import to_tuple
+
+
+__all__ = [
+    "shapes_broadcasting",
+    "broadcast_dist_samples_shape",
+    "get_broadcastable_dist_samples",
+    "broadcast_distribution_samples",
+    "broadcast_dist_samples_to",
+]
+
+
+def _check_shape_type(shape):
+    out = []
+    try:
+        shape = np.atleast_1d(shape)
+        for s in shape:
+            if isinstance(s, np.ndarray) and s.ndim > 0:
+                raise TypeError("Value {} is not a valid integer".format(s))
+            o = int(s)
+            if o != s:
+                raise TypeError("Value {} is not a valid integer".format(s))
+            out.append(o)
+    except Exception:
+        raise TypeError(
+            "Supplied value {} does not represent a valid shape".format(shape)
+        )
+    return tuple(out)
+
+
+def shapes_broadcasting(*args, raise_exception=False):
+    """Return the shape resulting from broadcasting multiple shapes.
+    Represents numpy's broadcasting rules.
+
+    Parameters
+    ----------
+    *args : array-like of int
+        Tuples or arrays or lists representing the shapes of arrays to be
+        broadcast.
+    raise_exception: bool (optional)
+        Controls whether to raise an exception or simply return `None` if
+        the broadcasting fails.
+
+    Returns
+    -------
+    Resulting shape. If broadcasting is not possible and `raise_exception` is
+    False, then `None` is returned. If `raise_exception` is `True`, a
+    `ValueError` is raised.
+    """
+    x = list(_check_shape_type(args[0])) if args else ()
+    for arg in args[1:]:
+        y = list(_check_shape_type(arg))
+        if len(x) < len(y):
+            x, y = y, x
+        if len(y) > 0:
+            x[-len(y) :] = [
+                j if i == 1 else i if j == 1 else i if i == j else 0
+                for i, j in zip(x[-len(y) :], y)
+            ]
+        if not all(x):
+            if raise_exception:
+                raise ValueError(
+                    "Supplied shapes {} do not broadcast together".format(
+                        ", ".join(["{}".format(a) for a in args])
+                    )
+                )
+            else:
+                return None
+    return tuple(x)
+
+
+def broadcast_dist_samples_shape(shapes, size=None):
+    """Apply shape broadcasting to shape tuples but assuming that the shapes
+    correspond to draws from random variables, with the `size` tuple possibly
+    prepended to it. The `size` prepend is ignored to consider if the supplied
+    `shapes` can broadcast or not. It is prepended to the resulting broadcasted
+    `shapes`, if any of the shape tuples had the `size` prepend.
+
+    Parameters
+    ----------
+    shapes: Iterable of tuples holding the distribution samples shapes
+    size: None, int or tuple (optional)
+        size of the sample set requested.
+
+    Returns
+    -------
+    tuple of the resulting shape
+
+    Examples
+    --------
+    .. code-block:: python
+        size = 100
+        shape0 = (size,)
+        shape1 = (size, 5)
+        shape2 = (size, 4, 5)
+        out = broadcast_dist_samples_shape([shape0, shape1, shape2],
+                                           size=size)
+        assert out == (size, 4, 5)
+
+    .. code-block:: python
+        size = 100
+        shape0 = (size,)
+        shape1 = (5,)
+        shape2 = (4, 5)
+        out = broadcast_dist_samples_shape([shape0, shape1, shape2],
+                                           size=size)
+        assert out == (size, 4, 5)
+
+    .. code-block:: python
+        size = 100
+        shape0 = (1,)
+        shape1 = (5,)
+        shape2 = (4, 5)
+        out = broadcast_dist_samples_shape([shape0, shape1, shape2],
+                                           size=size)
+        assert out == (4, 5)
+    """
+    if size is None:
+        broadcasted_shape = shapes_broadcasting(*shapes)
+        if broadcasted_shape is None:
+            raise ValueError(
+                "Cannot broadcast provided shapes {} given size: {}".format(
+                    ", ".join(["{}".format(s) for s in shapes]), size
+                )
+            )
+        return broadcasted_shape
+    shapes = [_check_shape_type(s) for s in shapes]
+    _size = to_tuple(size)
+    # samples shapes without the size prepend
+    sp_shapes = [
+        s[len(_size) :] if _size == s[: min([len(_size), len(s)])] else s
+        for s in shapes
+    ]
+    try:
+        broadcast_shape = shapes_broadcasting(*sp_shapes, raise_exception=True)
+    except ValueError:
+        raise ValueError(
+            "Cannot broadcast provided shapes {} given size: {}".format(
+                ", ".join(["{}".format(s) for s in shapes]), size
+            )
+        )
+    broadcastable_shapes = []
+    for shape, sp_shape in zip(shapes, sp_shapes):
+        if _size == shape[: len(_size)]:
+            # If size prepends the shape, then we have to add broadcasting axis
+            # in the middle
+            p_shape = (
+                shape[: len(_size)]
+                + (1,) * (len(broadcast_shape) - len(sp_shape))
+                + shape[len(_size) :]
+            )
+        else:
+            p_shape = shape
+        broadcastable_shapes.append(p_shape)
+    return shapes_broadcasting(*broadcastable_shapes, raise_exception=True)
+
+
+def get_broadcastable_dist_samples(
+    samples, size=None, must_bcast_with=None, return_out_shape=False
+):
+    """Get a view of the samples drawn from distributions which adds new axises
+    in between the `size` prepend and the distribution's `shape`. These views
+    should be able to broadcast the samples from the distrubtions taking into
+    account the `size` (i.e. the number of samples) of the draw, which is
+    prepended to the sample's `shape`. Optionally, one can supply an extra
+    `must_bcast_with` to try to force samples to be able to broadcast with a
+    given shape. A `ValueError` is raised if it is not possible to broadcast
+    the provided samples.
+
+    Parameters
+    ----------
+    samples: Iterable of ndarrays holding the sampled values
+    size: None, int or tuple (optional)
+        size of the sample set requested.
+    must_bcast_with: None, int or tuple (optional)
+        Tuple shape to which the samples must be able to broadcast
+    return_out_shape: bool (optional)
+        If `True`, this function also returns the output's shape and not only
+        samples views.
+
+    Returns
+    -------
+    broadcastable_samples: List of the broadcasted sample arrays
+    broadcast_shape: If `return_out_shape` is `True`, the resulting broadcast
+        shape is returned.
+
+    Examples
+    --------
+    .. code-block:: python
+        must_bcast_with = (3, 1, 5)
+        size = 100
+        sample0 = np.random.randn(size)
+        sample1 = np.random.randn(size, 5)
+        sample2 = np.random.randn(size, 4, 5)
+        out = broadcast_dist_samples_to(
+            [sample0, sample1, sample2],
+            size=size,
+            must_bcast_with=must_bcast_with,
+        )
+        assert out[0].shape == (size, 1, 1, 1)
+        assert out[1].shape == (size, 1, 1, 5)
+        assert out[2].shape == (size, 1, 4, 5)
+        assert np.all(sample0[:, None, None, None] == out[0])
+        assert np.all(sample1[:, None, None] == out[1])
+        assert np.all(sample2[:, None] == out[2])
+
+    .. code-block:: python
+        size = 100
+        must_bcast_with = (3, 1, 5)
+        sample0 = np.random.randn(size)
+        sample1 = np.random.randn(5)
+        sample2 = np.random.randn(4, 5)
+        out = broadcast_dist_samples_to(
+            [sample0, sample1, sample2],
+            size=size,
+            must_bcast_with=must_bcast_with,
+        )
+        assert out[0].shape == (size, 1, 1, 1)
+        assert out[1].shape == (5,)
+        assert out[2].shape == (4, 5)
+        assert np.all(sample0[:, None, None, None] == out[0])
+        assert np.all(sample1 == out[1])
+        assert np.all(sample2 == out[2])
+    """
+    samples = [np.asarray(p) for p in samples]
+    _size = to_tuple(size)
+    must_bcast_with = to_tuple(must_bcast_with)
+    # Raw samples shapes
+    p_shapes = [p.shape for p in samples] + [_check_shape_type(must_bcast_with)]
+    out_shape = broadcast_dist_samples_shape(p_shapes, size=size)
+    # samples shapes without the size prepend
+    sp_shapes = [
+        s[len(_size) :] if _size == s[: min([len(_size), len(s)])] else s
+        for s in p_shapes
+    ]
+    broadcast_shape = shapes_broadcasting(*sp_shapes, raise_exception=True)
+    broadcastable_samples = []
+    for param, p_shape, sp_shape in zip(samples, p_shapes, sp_shapes):
+        if _size == p_shape[: min([len(_size), len(p_shape)])]:
+            # If size prepends the shape, then we have to add broadcasting axis
+            # in the middle
+            slicer_head = [slice(None)] * len(_size)
+            slicer_tail = [np.newaxis] * (len(broadcast_shape) - len(sp_shape)) + [
+                slice(None)
+            ] * len(sp_shape)
+        else:
+            # If size does not prepend the shape, then we have leave the
+            # parameter as is
+            slicer_head = []
+            slicer_tail = [slice(None)] * len(sp_shape)
+        broadcastable_samples.append(param[tuple(slicer_head + slicer_tail)])
+    if return_out_shape:
+        return broadcastable_samples, out_shape
+    else:
+        return broadcastable_samples
+
+
+def broadcast_distribution_samples(samples, size=None):
+    """Broadcast samples drawn from distributions taking into account the
+    size (i.e. the number of samples) of the draw, which is prepended to
+    the sample's shape.
+
+    Parameters
+    ----------
+    samples: Iterable of ndarrays holding the sampled values
+    size: None, int or tuple (optional)
+        size of the sample set requested.
+
+    Returns
+    -------
+    List of broadcasted sample arrays
+
+    Examples
+    --------
+    .. code-block:: python
+        size = 100
+        sample0 = np.random.randn(size)
+        sample1 = np.random.randn(size, 5)
+        sample2 = np.random.randn(size, 4, 5)
+        out = broadcast_distribution_samples([sample0, sample1, sample2],
+                                             size=size)
+        assert all((o.shape == (size, 4, 5) for o in out))
+        assert np.all(sample0[:, None, None] == out[0])
+        assert np.all(sample1[:, None, :] == out[1])
+        assert np.all(sample2 == out[2])
+
+    .. code-block:: python
+        size = 100
+        sample0 = np.random.randn(size)
+        sample1 = np.random.randn(5)
+        sample2 = np.random.randn(4, 5)
+        out = broadcast_distribution_samples([sample0, sample1, sample2],
+                                             size=size)
+        assert all((o.shape == (size, 4, 5) for o in out))
+        assert np.all(sample0[:, None, None] == out[0])
+        assert np.all(sample1 == out[1])
+        assert np.all(sample2 == out[2])
+    """
+    return np.broadcast_arrays(*get_broadcastable_dist_samples(samples, size=size))
+
+
+def broadcast_dist_samples_to(to_shape, samples, size=None):
+    """Broadcast samples drawn from distributions to a given shape, taking into
+    account the size (i.e. the number of samples) of the draw, which is
+    prepended to the sample's shape.
+
+    Parameters
+    ----------
+    to_shape: Tuple shape onto which the samples must be able to broadcast
+    samples: Iterable of ndarrays holding the sampled values
+    size: None, int or tuple (optional)
+        size of the sample set requested.
+
+    Returns
+    -------
+    List of the broadcasted sample arrays
+
+    Examples
+    --------
+    .. code-block:: python
+        to_shape = (3, 1, 5)
+        size = 100
+        sample0 = np.random.randn(size)
+        sample1 = np.random.randn(size, 5)
+        sample2 = np.random.randn(size, 4, 5)
+        out = broadcast_dist_samples_to(
+            to_shape,
+            [sample0, sample1, sample2],
+            size=size
+        )
+        assert np.all((o.shape == (size, 3, 4, 5) for o in out))
+        assert np.all(sample0[:, None, None, None] == out[0])
+        assert np.all(sample1[:, None, None] == out[1])
+        assert np.all(sample2[:, None] == out[2])
+
+    .. code-block:: python
+        size = 100
+        to_shape = (3, 1, 5)
+        sample0 = np.random.randn(size)
+        sample1 = np.random.randn(5)
+        sample2 = np.random.randn(4, 5)
+        out = broadcast_dist_samples_to(
+            to_shape,
+            [sample0, sample1, sample2],
+            size=size
+        )
+        assert np.all((o.shape == (size, 3, 4, 5) for o in out))
+        assert np.all(sample0[:, None, None, None] == out[0])
+        assert np.all(sample1 == out[1])
+        assert np.all(sample2 == out[2])
+    """
+    samples, to_shape = get_broadcastable_dist_samples(
+        samples, size=size, must_bcast_with=to_shape, return_out_shape=True
+    )
+    return [np.broadcast_to(o, to_shape) for o in samples]
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index d47a8c8abd4..f55b3247f67 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -486,13 +486,14 @@ def test_multivariate2(self):
             probs = pm.Dirichlet("probs", a=np.ones(6), shape=6)
             obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data)
             burned_trace = pm.sample(20, tune=10, cores=1)
-        sim_priors = pm.sample_prior_predictive(samples=20, model=dm_model)
-        sim_ppc = pm.sample_posterior_predictive(
-            burned_trace, samples=20, model=dm_model
-        )
-        assert sim_priors["probs"].shape == (20, 6)
-        assert sim_priors["obs"].shape == (20, 6)
-        assert sim_ppc["obs"].shape == (20,) + obs.distribution.shape
+        sim_priors = pm.sample_prior_predictive(samples=20,
+                                                model=dm_model)
+        sim_ppc = pm.sample_posterior_predictive(burned_trace,
+                                                 samples=20,
+                                                 model=dm_model)
+        assert sim_priors['probs'].shape == (20, 6)
+        assert sim_priors['obs'].shape == (20,) + obs.distribution.shape
+        assert sim_ppc['obs'].shape == (20,) + obs.distribution.shape
 
     def test_layers(self):
         with pm.Model() as model:
diff --git a/pymc3/tests/test_shape_handling.py b/pymc3/tests/test_shape_handling.py
new file mode 100644
index 00000000000..5af1c8a27e0
--- /dev/null
+++ b/pymc3/tests/test_shape_handling.py
@@ -0,0 +1,223 @@
+import pytest
+import numpy as np
+import pymc3 as pm
+from pymc3.distributions.shape_utils import (
+    shapes_broadcasting,
+    broadcast_dist_samples_shape,
+    get_broadcastable_dist_samples,
+    broadcast_distribution_samples,
+    broadcast_dist_samples_to,
+)
+from pymc3.distributions.dist_math import to_tuple
+
+test_shapes = [
+    (tuple(), (1,), (4,), (5, 4)),
+    (tuple(), (1,), (7,), (5, 4)),
+    (tuple(), (1,), (1, 4), (5, 4)),
+    (tuple(), (1,), (5, 1), (5, 4)),
+    (tuple(), (1,), (3, 4), (5, 4)),
+    (tuple(), (1,), (5, 3), (5, 4)),
+    (tuple(), (1,), (10, 4), (5, 4)),
+    (tuple(), (1,), (10,), (5, 4)),
+    (tuple(), (1,), (1, 1, 4), (5, 4)),
+    (tuple(), (1,), (10, 1, 4), (5, 4)),
+    (tuple(), (1,), (10, 5, 4), (5, 4)),
+]
+test_sizes = [
+    None,
+    tuple(),
+    1,
+    (1,),
+    10,
+    (10,),
+    (1, 1),
+    (10, 1),
+    (1, 10),
+    (5,),
+    (5, 4),
+    (1, 1, 1, 1),
+]
+test_to_shapes = [None, tuple(), (10, 5, 4), (10, 1, 1, 5, 1)]
+
+
+@pytest.fixture(scope="module", params=test_sizes, ids=str)
+def fixture_sizes(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=test_shapes, ids=str)
+def fixture_shapes(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[False, True], ids=str)
+def fixture_exception_handling(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def samples_to_broadcast(fixture_sizes, fixture_shapes):
+    samples = [np.empty(s) for s in fixture_shapes]
+    try:
+        broadcast_shape = broadcast_dist_samples_shape(
+            fixture_shapes, size=fixture_sizes
+        )
+    except ValueError:
+        broadcast_shape = None
+    return fixture_sizes, samples, broadcast_shape
+
+
+@pytest.fixture(scope="module", params=test_to_shapes, ids=str)
+def samples_to_broadcast_to(request, samples_to_broadcast):
+    to_shape = request.param
+    size, samples, broadcast_shape = samples_to_broadcast
+    if broadcast_shape is not None:
+        try:
+            broadcast_shape = broadcast_dist_samples_shape(
+                [broadcast_shape, to_tuple(to_shape)], size=size
+            )
+        except ValueError:
+            broadcast_shape = None
+    return to_shape, size, samples, broadcast_shape
+
+
+@pytest.fixture(scope="module")
+def fixture_model():
+    with pm.Model() as model:
+        n = 5
+        dim = 4
+        with pm.Model():
+            cov = pm.InverseGamma("cov", alpha=1, beta=1)
+            x = pm.Normal(
+                "x", mu=np.ones((dim,)), sigma=pm.math.sqrt(cov), shape=(n, dim)
+            )
+            eps = pm.HalfNormal("eps", np.ones((n, 1)), shape=(n, dim))
+    return model, cov, x, eps
+
+
+# TODO: once #3422 is solved this fixture should be replaced by fixture_sizes
+@pytest.fixture(
+    scope="module",
+    params=[
+        s if len(to_tuple(s)) <= 1 else pytest.param(s, marks=pytest.mark.xfail)
+        for s in test_sizes
+    ],
+    ids=str,
+)
+def fixture_samples(request):
+    return request.param
+
+
+class TestShapesBroadcasting:
+    @pytest.mark.parametrize(
+        "bad_input",
+        [None, [None], "asd", 3.6, {1: 2}, {3}, [8, [8]], "3", ["3"], np.array([[2]])],
+        ids=str,
+    )
+    def test_type_check_raises(self, bad_input):
+        with pytest.raises(TypeError):
+            shapes_broadcasting(bad_input, tuple(), raise_exception=True)
+        with pytest.raises(TypeError):
+            shapes_broadcasting(bad_input, tuple(), raise_exception=False)
+
+    def test_type_check_success(self):
+        inputs = [3, 3.0, tuple(), [3], (3,), np.array(3), np.array([3])]
+        out = shapes_broadcasting(*inputs)
+        assert out == (3,)
+
+    def test_broadcasting(self, fixture_shapes, fixture_exception_handling):
+        shapes = fixture_shapes
+        raise_exception = fixture_exception_handling
+        try:
+            expected_out = np.broadcast(*[np.empty(s) for s in shapes]).shape
+        except ValueError:
+            expected_out = None
+        if expected_out is None:
+            if raise_exception:
+                with pytest.raises(ValueError):
+                    shapes_broadcasting(*shapes, raise_exception=raise_exception)
+            else:
+                out = shapes_broadcasting(*shapes, raise_exception=raise_exception)
+                assert out is None
+        else:
+            out = shapes_broadcasting(*shapes, raise_exception=raise_exception)
+            assert out == expected_out
+
+    def test_broadcast_dist_samples_shape(self, fixture_sizes, fixture_shapes):
+        size = fixture_sizes
+        shapes = fixture_shapes
+        size_ = to_tuple(size)
+        shapes_ = [
+            s if s[: min([len(size_), len(s)])] != size_ else s[len(size_) :]
+            for s in shapes
+        ]
+        try:
+            expected_out = np.broadcast(*[np.empty(s) for s in shapes_]).shape
+        except ValueError:
+            expected_out = None
+        if expected_out is not None and any(
+            (s[: min([len(size_), len(s)])] == size_ for s in shapes)
+        ):
+            expected_out = size_ + expected_out
+        if expected_out is None:
+            with pytest.raises(ValueError):
+                broadcast_dist_samples_shape(shapes, size=size)
+        else:
+            out = broadcast_dist_samples_shape(shapes, size=size)
+            assert out == expected_out
+
+
+class TestSamplesBroadcasting:
+    def test_broadcast_distribution_samples(self, samples_to_broadcast):
+        size, samples, broadcast_shape = samples_to_broadcast
+        if broadcast_shape is not None:
+            outs = broadcast_distribution_samples(samples, size=size)
+            assert all((o.shape == broadcast_shape for o in outs))
+        else:
+            with pytest.raises(ValueError):
+                broadcast_distribution_samples(samples, size=size)
+
+    def test_get_broadcastable_dist_samples(self, samples_to_broadcast):
+        size, samples, broadcast_shape = samples_to_broadcast
+        if broadcast_shape is not None:
+            size_ = to_tuple(size)
+            outs, out_shape = get_broadcastable_dist_samples(
+                samples, size=size, return_out_shape=True
+            )
+            assert out_shape == broadcast_shape
+            for i, o in zip(samples, outs):
+                ishape = i.shape
+                if ishape[: min([len(size_), len(ishape)])] == size_:
+                    expected_shape = (
+                        size_
+                        + (1,) * (len(broadcast_shape) - len(ishape))
+                        + ishape[len(size_) :]
+                    )
+                else:
+                    expected_shape = ishape
+                assert o.shape == expected_shape
+            assert shapes_broadcasting(*[o.shape for o in outs]) == broadcast_shape
+        else:
+            with pytest.raises(ValueError):
+                get_broadcastable_dist_samples(samples, size=size)
+
+    def test_broadcast_dist_samples_to(self, samples_to_broadcast_to):
+        to_shape, size, samples, broadcast_shape = samples_to_broadcast_to
+        if broadcast_shape is not None:
+            outs = broadcast_dist_samples_to(to_shape, samples, size=size)
+            assert all((o.shape == broadcast_shape for o in outs))
+        else:
+            with pytest.raises(ValueError):
+                broadcast_dist_samples_to(to_shape, samples, size=size)
+
+
+def test_sample_generate_values(fixture_model, fixture_samples):
+    model, cov, x, eps = fixture_model
+    size = to_tuple(fixture_samples)
+    if size == (1,):
+        # Single draws are interpreted as scalars for backwards compatibility
+        size = tuple()
+    with model:
+        prior = pm.sample_prior_predictive(samples=fixture_samples)
+        for rv in [cov, x, eps]:
+            assert prior[rv.name].shape == size + tuple(rv.distribution.shape)