Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reduce overhead for simple models #1174

Open
lukasheinrich opened this issue Nov 11, 2020 · 2 comments
Open

reduce overhead for simple models #1174

lukasheinrich opened this issue Nov 11, 2020 · 2 comments
Labels
research experimental stuff

Comments

@lukasheinrich
Copy link
Contributor

lukasheinrich commented Nov 11, 2020

Description

pyhf.simplemodels.hepdata_like is used by a lot of people but for very simple models like this we have quite a bit of overhead over a manual calculation

This is alsoo important for toys (which often are used by single-bin analyses see #1161 )

m = pyhf.simplemodels.hepdata_like([5],[50],[6])
d = [50]+m.config.auxdata
p = m.config.suggested_init()
m.config.auxdata

m.logpdf = jax.jit(m.logpdf)

jp,jd = (jnp.asarray(p),jnp.asarray(d))
import jax.numpy as jnp
import scipy.stats
from scipy.special import gammaln

def poisson_logpdf(n, lam):
    return n * np.log(lam) - lam - gammaln(n + 1.0)

def logpdf(p,d):
    s = 5
    b = 50
    db = 6
    tau = (b/db)**2
    mu,gamma = p
    obs, aux = d
    return poisson_logpdf(obs,mu*s + b) + poisson_logpdf(aux,tau*gamma)    

image

@matthewfeickert matthewfeickert added the research experimental stuff label Nov 11, 2020
@lukasheinrich
Copy link
Contributor Author

A few comparissons:

  • standard pyhf m
  • pyhf.tensorlib/pyhf.probability-based moodel m2
  • pyhf.modifiers + pyhf.constraints - based model m3
  • pure numpy m4

seems like with some not too much effort we can get m3, with some effort m2

import numpy as np
import scipy.stats
from jax.scipy.special import gammaln as jax_gammaln
from scipy.special import gammaln as gammaln
import jax
import pyhf
import jax.numpy as jnp
import numba
import pyhf.probability
from pyhf.tensor.common import _TensorViewer
pyhf.set_backend('numpy')

class Model(object):
    def __init__(self,s,b,db):
        self.s = s
        self.b = b
        self.db = db
        self.tau = (b/db)**2
        self.tv = _TensorViewer([np.array([0]),np.array([1])])
        
    def logpdf(self,p,d):
        mu,gamma = p
        a = pyhf.probability.Poisson(np.array([mu*self.s + self.b*gamma]))
        b = pyhf.probability.Poisson(np.array([self.tau*gamma]))
        pdf = pyhf.probability.Simultaneous([a,b], self.tv)
        return pdf.log_prob(d)

class PyhfBasedModel(object):
    def __init__(self,nominal,config):
        self.nominal = nominal
        self.norm  = pyhf.modifiers.normfactor_combined([('mu','normfactor')],m.config,mods)
        self.shape = pyhf.modifiers.shapesys_combined([('uncorr_bkguncrt','shapesys')],m.config,mods)
        self.pois = pyhf.constraints.poisson_constraint_combined(config)
    def logpdf(self,p,d):
        a = pyhf.probability.Poisson(np.sum(self.nominal * self.shape.apply(p) * self.norm.apply(p),axis=1))
        b = self.pois.make_pdf(p)
        return pyhf.probability.Simultaneous([a,b],m2.tv).log_prob(d)

    
def poisson_logpdf(n, lam):
    return n * np.log(lam) - lam - gammaln(n + 1.0)

class PureModel(object):
    def __init__(self,s,b,db):
        self.s = s
        self.b = b
        self.db = db
        self.tau = (b/db)**2
    def logpdf(self,p,d):
        mu,gamma = p
        obs, aux = d
        return poisson_logpdf(obs,mu*self.s + self.b*gamma) + poisson_logpdf(aux,self.tau*gamma)    

    
m = pyhf.simplemodels.hepdata_like([5],[50],[6])
d = [50]+m.config.auxdata
m.config.auxdata

Np,Nd = (np.asarray([2.0,0.2]),np.asarray(d))


m2 = Model(5,50,6)

m3 = PyhfBasedModel(m.nominal_rates,m.config)

m4 = PureModel(5,50,6)
m2.logpdf(Np,Nd),m.logpdf(Np,Nd),m3.logpdf(Np,Nd),m4.logpdf(Np,Nd)
(array([-77.94252148]),
 array([-77.94252148]),
 array([[[-77.94252148]]]),
 -77.94252147644127)
%%timeit
m.logpdf(Np,Nd)
129 µs ± 4.55 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%%timeit
m2.logpdf(Np,Nd)
20.9 µs ± 563 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%%timeit
m3.logpdf(Np,Nd)
81.7 µs ± 1.66 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%%timeit
m4.logpdf(Np,Nd)
7.76 µs ± 55.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

@lukasheinrich
Copy link
Contributor Author

lukasheinrich commented Nov 12, 2020

these model like this

class TensorProbModel(object):
    def __init__(self,s,b,db,config):
        self.config = config
        self.s = s
        self.b = b
        self.db = db
        self.tau = (b/db)**2
        self.nominal = pyhf.tensorlib.astensor([5,50])

        sizes = []
        sizes.append(self.config.nmaindata)
        sizes.append(self.config.nauxdata)
        self.tv = _tensorviewer_from_sizes(sizes, ['main', 'aux'], batch_size = None)

    def make_pdf(self,p):
        mu,gamma = p
        factors  = [pyhf.tensorlib.astensor([mu,1]),pyhf.tensorlib.astensor([1,gamma])]
        expected = sum(reduce(lambda s,v: s*v,factors,self.nominal))
        a = pyhf.probability.Poisson(pyhf.tensorlib.astensor([expected]))
        
        expected_constraint = [self.tau*gamma]
        b = pyhf.probability.Poisson(pyhf.tensorlib.astensor(expected_constraint))
        pdf = pyhf.probability.Simultaneous([a,b], self.tv)
        return pdf

    def logpdf(self,p,d):
        pdf = self.make_pdf(p)
        return pdf.log_prob(d)

class PyhfBasedModel(object):
    def __init__(self,spec,config):
        self.config = config
        mods,self.nominal = _nominal_and_modifiers_from_spec(config,spec)
        self.norm  = pyhf.modifiers.normfactor_combined([('mu','normfactor')],m.config,mods)
        self.shape = pyhf.modifiers.shapesys_combined([('uncorr_bkguncrt','shapesys')],m.config,mods)
        self.pois  = pyhf.constraints.poisson_constraint_combined(config)
        sizes = []
        sizes.append(self.config.nmaindata)
        sizes.append(self.config.nauxdata)
        self.tv = _tensorviewer_from_sizes(sizes, ['main', 'aux'], batch_size = None)
    def make_pdf(self,p):
        p = pyhf.tensorlib.astensor(p)
        summed = self.nominal[0,:,0,:]
        factors = [y[:,0,:] for x in [self.shape.apply(p), self.norm.apply(p)] for y in x] 
        expected = sum(reduce(lambda s,v: s*v,factors,summed))
        a = pyhf.probability.Independent(pyhf.probability.Poisson(expected))
        b = self.pois.make_pdf(p)
        pdf = pyhf.probability.Simultaneous([a,b],self.tv)
        return pdf
    
    def logpdf(self,p,d):
        return self.make_pdf(p).log_prob(d).reshape(1,)

class PureModel(object):
    def __init__(self,s,b,db,config):
        self.config = config
        self.s = s
        self.b = b
        self.db = db
        self.tau = (b/db)**2
        self.nominal = pyhf.tensorlib.astensor([self.s,self.b])
        sizes = []
        sizes.append(self.config.nmaindata)
        sizes.append(self.config.nauxdata)
        self.tv = _tensorviewer_from_sizes(sizes, ['main', 'aux'], batch_size = None)

    def make_pdf(self,p):
        mu,gamma = p
        factors  = [pyhf.tensorlib.astensor([mu,1]),pyhf.tensorlib.astensor([1,gamma])]
        expected = sum(reduce(lambda s,v: s*v,factors,self.nominal))
        a = pyhf.probability.Poisson(pyhf.tensorlib.astensor([expected]))
        
        expected_constraint = [self.tau*gamma]
        b = pyhf.probability.Poisson(pyhf.tensorlib.astensor(expected_constraint))
        pdf = pyhf.probability.Simultaneous([a,b], self.tv)
        return pdf
    
    def logpdf(self,p,d):
        mu,gamma = p
        obs, aux = d
        return (
            pyhf.tensorlib.poisson_logpdf(obs,mu*self.s + self.b*gamma) + pyhf.tensorlib.poisson_logpdf(aux,self.tau*gamma)
        ).reshape(1,)

    

can be dropped in without changes into pyhf.infer.hypotest with the toy calculator and achieved 3-5x toy throughput

with pyhf.set_backend('numpy',''minuit') it's yet faster

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
research experimental stuff
Projects
Status: To do
Development

No branches or pull requests

2 participants