In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
import pandas as pd
import seaborn as sns
import tensorflow as tf
#tfd = tf.contrib.distributions
import tensorflow_probability as tfp
tfd = tfp.distributions
tfb = tfp.bijectors
layers = tf.contrib.layers
tf.set_random_seed(0)
from hepmc.core.densities.qcd import ee_qq_ng, export_hepmc, import_hepmc
from hepmc.core.densities.nice import Nice
from hepmc.core.phase_space.rambo import RamboOnDiet
from hepmc.core.phase_space.mapping import MappedDensity
from hepmc.core.sampling import Sample
from hepmc.core.integration.importance import ImportanceMC
from hepmc.core.sampling import Sample, AcceptRejectSampler
#from hepmc.core.markov.metropolis import DefaultMetropolis
#from hepmc.core.proposals import Gaussian

In [None]:
eeqq3g = ee_qq_ng(3, 100., 5., .3)
rambo_mapping = RamboOnDiet(100., 5)
mapped = MappedDensity(eeqq3g, rambo_mapping)
training_sample = import_hepmc('../samples/qcd/2-5/training.hepmc')
training_sample = Sample(data=rambo_mapping.map_inverse(training_sample.data), weights=training_sample.weights)

In [None]:
data = pd.DataFrame(training_sample.data)
f = sns.PairGrid(data)
f = f.map_diag(plt.hist, bins=15)
f = f.map_offdiag(plt.hist2d, bins=15)

In [None]:
sherpa_weighted_sample = import_hepmc('../samples/qcd/2-5/sherpa_weighted.hepmc')
sherpa_weighted_sample = Sample(data=rambo_mapping.map_inverse(sherpa_weighted_sample.data), weights=sherpa_weighted_sample.weights)

In [None]:
data = pd.DataFrame(sherpa_weighted_sample.data)
f = sns.PairGrid(data)
f = f.map_diag(plt.hist, bins=15)
f = f.map_offdiag(plt.hist2d, bins=15)

In [None]:
%time nice = Nice(training_sample, train_iters=10000, num_bijectors=4)

In [None]:
sample = nice.rvs(100000)

In [None]:
data = pd.DataFrame(sample)
f = sns.PairGrid(data)
f = f.map_diag(plt.hist, bins=15)
f = f.map_offdiag(plt.hist2d, bins=15, range=[[0, 1], [0, 1]])

In [None]:
importance_sampler = ImportanceMC(mapped, nice)
%time nice_sample = importance_sampler.sample(10000)

In [None]:
mapped_sample = Sample(data=rambo_mapping.map(nice_sample.data), weights=nice_sample.weights)
export_hepmc(100., mapped_sample, "../samples/qcd/2-5/realnvp.hepmc")

In [None]:
bound = nice_sample.weights.max()
print(bound)

In [None]:
nice_sample.weights.mean()/bound

In [None]:
sampler = AcceptRejectSampler(target=mapped, bound=bound, sampler=importance_sampler, sampling_pdf=nice.pdf)

In [None]:
%time sample = sampler.sample(100, expected_efficiency=.004)

In [None]:
class AcceptRejectSampler(object):
    """ Acceptance Rejection method for sampling a given pdf.
    
    The method uses a known distribution and sampling method to propose
    samples which are then accepted with the probability
    pdf(x)/(c * sampling_pdf(x)), thus producing the desired distribution. 
    The resulting sample is unweighted.
    
    .. todo::
        Handle points that lie above the bound.
    """

    def __init__(self, target, bound: float, 
            sampler = None, sampling_pdf = None) -> None:
        """
        Parameters
        ----------
        target
            Unnormalized desired probability distribution of the sample.
        bound
            Constant such that pdf(x) <= bound * sampling_pdf(x)
            for all x in the range of sampling.
        sampler
            The sampler which generates the sample. The default is a uniform sampler.
        """
        self.target = target
        self.bound = bound
        self.ndim = target.ndim

        if sampler is None:
            sampler = UniformSampler(target)
            def sampling_pdf(x):
                return np.ones(x.size)

        self.sampler = sampler
        self.sampling_pdf = sampling_pdf

    def sample(self, sample_size: int, expected_efficiency: float = 1.) -> None:
        x = np.empty((sample_size, self.ndim))

        #indices = np.arange(sample_size)
        n_todo = sample_size
        trials = 0
        while n_todo > 0:
            print('n_todo:', n_todo)
            trials += int(n_todo/expected_efficiency)
            sample = self.sampler.sample(int(n_todo/expected_efficiency))
            proposal = sample.data
            #accept = np.random.rand(indices.size) * self.bound * self.sampling_pdf(proposal) <= self.target.pdf(proposal)
            #accept = np.random.rand(indices.size) * self.bound <= sample.weights
            #accept = np.random.rand(indices.size) < sample.weights / self.bound
            u = np.random.rand(int(n_todo/expected_efficiency))
            accept = u < sample.weights / self.bound
            n_accept = accept.sum()
            if n_accept <= n_todo:
                x[sample_size-n_todo:sample_size-n_todo+n_accept] = proposal[accept]
            else:
                accepted = proposal[accept]
                x[sample_size-n_todo:] = accepted[:n_todo]
            n_todo -= n_accept
            #x[indices[accept]] = proposal[accept]
            #indices = indices[np.logical_not(accept)]
        print('Unweighting eff.:', sample_size/trials)
        return Sample(data=x, target=self.target)

In [None]:
class ImportanceMC(object):
    """ Importance sampling Monte Carlo integration.

    Importance sampling replaces the uniform sample distribution of plain
    Monte Carlo with a custom pdf.

    By default a uniform probability distribution is used, making the method
    equivalent to plain MC.
    """

    def __init__(self, target, dist, name: str = "MC Importance") -> None:
        """
        Parameters
        ----------
        dist
            Distribution to use for sampling.
        name
            Name of the method that can be used as label in
            plotting routines (can be changed to name parameters).
        """
        self.method_name = name

        self.target = target
        self.ndim = target.ndim
        self.dist = dist

    def sample(self, eval_count) -> Sample:
        """Approximate the integral of fn.

        Parameters
        ----------
        eval_count
            Total number of function evaluations.

        Returns
        -------
        Tuple[Sample, float, float]
            (sample, integral_estimate, error_estimate)
        """
        xs = np.empty((eval_count, self.ndim))
        ys = np.empty(eval_count)
        weights = np.empty(eval_count)
        trials = 0

        indices = np.arange(eval_count)
        while indices.size > 0:
            print('Todo:', indices.size)
            trials += indices.size
            x = self.dist.rvs(indices.size)
            y = self.target.pdf(x)
            in_bounds = y != 0.
            xs[indices[in_bounds]] = x[in_bounds]
            ys[indices[in_bounds]] = y[in_bounds]
            indices = indices[np.logical_not(in_bounds)]

        print('Sampling efficiency:', eval_count/trials)
        weights = ys / self.dist.pdf(xs)
        sample = Sample(data=xs, target=self.target, pdf=ys, weights=weights)

        return sample

In [None]:
sess = tf.InteractiveSession()

In [None]:
DTYPE=tf.float32
NP_DTYPE=np.float32
USE_BATCHNORM = False

In [None]:
batch_size = 100
num_bijectors = 5
train_iters = 1e4

In [None]:
mapped_sample = Sample(data=rambo_mapping.map(sample.data), weights=sample.weights)
export_hepmc(100., mapped_sample, "../samples/qcd/2-5/realnvp.hepmc")