# Slot 1 IPS

Training on all slot data, but (progressive validation) evaluation only on slot 1 using ips estimator.

In [36]:
class EasyMean:
    def __init__(self):
        self.n = 0
        self.partials = []

    def __iadd__(self, x):
        self.n += 1
        
        i = 0
        for y in self.partials:
            if abs(x) < abs(y):
                x, y = y, x
            hi = x + y
            lo = y - (hi - x)
            if lo:
                self.partials[i] = lo
                i += 1
            x = hi
        self.partials[i:] = [x]
        
        return self
    
    def __isub__(self, x):
        return self.__iadd__(-x)

    def mean(self):
        return sum(self.partials, 0.0) / max(self.n, 1)
    
class EasyAcc:
    def __init__(self):
        self.n = 0
        self.damean = EasyMean()
        self.dameansq = EasyMean()
        
    def __iadd__(self, x):
        self.n += 1
        self.damean += x
        self.dameansq += x * x
        return self
        
    def __isub__(self, x):
        self.n += 1
        self.damean -= x
        self.dameansq += x * x
        return self
        
    def mean(self):
        return self.damean.mean()
        
    def meanvar(self):
        from math import sqrt
        
        mux = self.damean.mean()
        muxsq = self.dameansq.mean()
        
        return mux, sqrt(muxsq - mux * mux) / sqrt(max(1, self.n))

def forkit(func, args, kwds):
    from multiprocessing import Pool
    
    pool = Pool(processes=1)
    return pool.apply(func, args, kwds)
    
def ccb_eval_slot1(data, veedub):
    from vowpalwabbit import pyvw
    import gzip
    
    vw = pyvw.vw(veedub)
    
    print('{:<6s}\t{:<7s}\t{:<6s}\t{:<22s}\t{:<9s}\t{:<9s}\t{:<9s}\t{:<9s}'.format(
                     'n', 'nhit', 'predict', 'cost', 'since last', 'online', 'sincelastonline', 'sincelastphit'
                ),
               flush=True)
    
    ipscost, sincelastipscost, online, sincelastonline, sincelastphit = (EasyAcc() for _ in range(5))
    nhit = 0
    
    for n, ex in enumerate(gzip.open(data)):
        from pprint import pformat
        
        stringex = "".join(map(chr, ex))
        objex = json.loads(stringex)
        chosenzero = objex['_outcomes'][0]['_a'][0]
        chosenp = objex['_outcomes'][0]['_p'][0]
        chosencost = objex['_outcomes'][0]['_label_cost']
        pred = vw.predict(stringex)
        predzero = pred[0][0][0]
        vw.learn(stringex)
        
        online += chosencost
        sincelastonline += chosencost
        
        policycost = chosencost / chosenp if predzero == chosenzero else 0
        ipscost += policycost
        sincelastipscost += policycost
        
        sincelastphit += 1 if predzero == chosenzero else 0
        nhit += 1 if predzero == chosenzero else 0

        if ipscost.n & (ipscost.n - 1) == 0:
            print('{:<6d}\t{:<7d}\t{:<6d}\t{:<9.5f} +/- {:<9.5f}\t{:<9.5f}\t{:<9.5f}\t{:<9.5f}\t{:<9.5f}'.format(
                        ipscost.n,
                        nhit,
                        predzero,
                        ipscost.mean(),
                        2 * ipscost.meanvar()[1],
                        sincelastipscost.mean(),
                        online.mean(),
                        sincelastonline.mean(),
                        sincelastphit.mean(),
                 ),
                 flush=True)
            sincelastipscost, sincelastonline, sincelastpa = (EasyAcc() for _ in range(3))
                
    print('{:<6d}\t{:<7d}\t{:<6d}\t{:<9.5f} +/- {:<9.5f}\t{:<9.5f}\t{:<9.5f}\t{:<9.5f}\t{:<9.5f}'.format(
                ipscost.n,
                nhit,
                predzero,
                ipscost.mean(),
                2 * ipscost.meanvar()[1],
                sincelastipscost.mean(),
                online.mean(),
                sincelastonline.mean(),
                sincelastphit.mean(),
            ),
            flush=True)  

## Original Loop Parameters

In [3]:
forkit(ccb_eval_slot1, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/allslots-5-5-2020_5-11-2020_s0.002_p43807_n118776.gz', 
    'veedub': '--dsjson --ccb_explore_adf --epsilon 0 --cb_type mtr -l 1e-5 --power_t 0 --clip_p 0.2 -q cu -q cp -q cb -q du -q dp -q db -q eu -q ep -q eb --l1 1e-8 -b 20'
})

n     	nhit   	predict	cost                  	since last	online   	sincelastonline	sincelastphit
1     	0      	0     	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.00000  
2     	0      	0     	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.00000  
4     	0      	29    	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.00000  
8     	1      	23    	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.12500  
16    	1      	12    	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.06250  
32    	1      	0     	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.03125  
64    	2      	1     	0.00000   +/- 0.00000  	0.00000  	-0.00013 	-0.00025 	0.03125  
128   	5      	4     	0.00000   +/- 0.00000  	0.00000  	-0.00016 	-0.00019 	0.03906  
256   	8      	0     	-0.00469  +/- 0.00936  	-0.00937 	-0.00048 	-0.00080 	0.03125  
512   	18     	11    	-0.00626  +/- 0.00910  	-0.00782 	-0.00070 	-0.00093 	0.03516  
1024  	34     	27    	-0.00315  +/- 0.00456

## 5/7 Optimized Hyperparameters

In [39]:
forkit(ccb_eval_slot1, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/allslots-5-5-2020_5-11-2020_s0.002_p43807_n118776.gz',
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --power_t 0 -l 0.0001 --cb_type ips -q bc -q pc -q be -q uc -q bd'
})

n     	nhit   	predict	cost                  	since last	online   	sincelastonline	sincelastphit
1     	0      	0     	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.00000  
2     	0      	0     	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.00000  
4     	0      	29    	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.00000  
8     	1      	23    	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.12500  
16    	1      	12    	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.06250  
32    	1      	0     	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.03125  
64    	2      	1     	-0.01875  +/- 0.03721  	-0.03750 	-0.00013 	-0.00025 	0.03125  
128   	4      	1     	-0.00937  +/- 0.01868  	0.00000  	-0.00016 	-0.00019 	0.03125  
256   	7      	1     	-0.01406  +/- 0.01614  	-0.01875 	-0.00048 	-0.00080 	0.02734  
512   	9      	1     	-0.00703  +/- 0.00810  	0.00000  	-0.00070 	-0.00093 	0.01758  
1024  	11     	1     	-0.00469  +/- 0.00468

## 5/11 Coin Hyperparameters 

In [2]:
forkit(ccb_eval_slot1, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/allslots-5-5-2020_5-11-2020_s0.002_p43807_n118776.gz', 
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --coin --cb_type ips -q bd -q ue -q bc -q uc -q ud'
})

n     	nhit   	predict	cost                  	since last	online   	sincelastonline	sincelastphit
1     	0      	0     	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.00000  
2     	0      	0     	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.00000  
4     	0      	29    	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.00000  
8     	1      	23    	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.12500  
16    	1      	12    	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.06250  
32    	1      	0     	0.00000   +/- 0.00000  	0.00000  	0.00000  	0.00000  	0.03125  
64    	3      	1     	-0.01875  +/- 0.03721  	-0.03750 	-0.00013 	-0.00025 	0.04688  
128   	5      	0     	-0.00937  +/- 0.01868  	0.00000  	-0.00016 	-0.00019 	0.03906  
256   	13     	19    	-0.00937  +/- 0.01321  	-0.00937 	-0.00048 	-0.00080 	0.05078  
512   	32     	11    	-0.00470  +/- 0.00662  	-0.00003 	-0.00070 	-0.00093 	0.06250  
1024  	40     	25    	-0.00235  +/- 0.00331

# Slot 1, Empirical Likelihood

Training on all slot data, but (progressive validation) evaluation only on slot 1 using empirical likelihood confidence intervals.

In [60]:
def estimate(datagen, wmin, wmax, rmin=0, rmax=1, raiseonerr=False, censored=False):
    import numpy as np
    from scipy.optimize import brentq

    assert wmin >= 0
    assert wmin < 1
    assert wmax > 1
    assert rmax >= rmin

    num = sum(c for c, w, r in datagen())
    assert num >= 1

    # solve dual

    def sumofw(beta):
        return sum((c * w)/((w - 1) * beta + num)
                   for c, w, _ in datagen()
                   if c > 0)

    # fun fact about the MLE:
    #
    # if \frac{1}{n} \sum_n w_n < 1 then \beta^* wants to be negative
    # but as wmax \to \infty, lower bound on \beta^* is 0
    # therefore the estimate becomes
    #
    # \hat{V}(\pi) = \left( \frac{1}{n} \sum_n w_n r_n \right) +
    #                \left( 1 - \frac{1}{n} \sum_n w_n \right) \rho
    #
    # where \rho is anything between rmin and rmax

    def graddualobjective(beta):
        return sum(c * (w - 1)/((w - 1) * beta + num)
                   for c, w, _ in datagen()
                   if c > 0)

    betamax = min( ((num - c) / (1 - w)
                    for c, w, _ in datagen()
                    if w < 1 and c > 0 ),
                   default=num / (1 - wmin))
    betamax = min(betamax, num / (1 - wmin))

    betamin = max( ((num - c) / (1 - w)
                    for c, w, _ in datagen()
                    if w > 1 and c > 0 ),
                   default=num / (1 - wmax))
    betamin = max(betamin, num / (1 - wmax))

    gradmin = graddualobjective(betamin)
    gradmax = graddualobjective(betamax)
    if gradmin * gradmax < 0:
        betastar = brentq(f=graddualobjective, a=betamin, b=betamax)
    elif gradmin < 0:
        betastar = betamin
    else:
        betastar = betamax

    remw = max(0.0, 1.0 - sumofw(betastar))

    if censored:
        vnumhat = 0
        vdenomhat = 0

        for c, w, r in datagen():
            if c > 0:
                if r is not None:
                    vnumhat += w*r* c/((w - 1) * betastar + num)
                    vdenomhat += w*1* c/((w - 1) * betastar + num)

        if np.allclose(vdenomhat, 0):
            vhat = vmin = vmax = None
        else:
            vnummin = vnumhat + remw * rmin
            vdenommin = vdenomhat + remw
            vmin = min([ vnummin / vdenommin, vnumhat / vdenomhat ])

            vnummax = vnumhat + remw * rmax
            vdenommax = vdenomhat + remw
            vmax = max([ vnummax / vdenommax, vnumhat / vdenomhat ])

            vhat = 0.5*(vmin + vmax)
    else:
        vhat = 0
        for c, w, r in datagen():
            if c > 0:
                vhat += w*r* c/((w - 1) * betastar + num)

        vmin = vhat + remw * rmin
        vmax = vhat + remw * rmax
        vhat += remw * (rmin + rmax) / 2.0

    return vhat, {
            'betastar': betastar,
            'vmin': vmin,
            'vmax': vmax,
            'num': num,
            'qfunc': lambda c, w, r: c / (num + betastar * (w - 1)),
           }

def asymptoticconfidenceinterval(datagen, wmin, wmax, alpha=0.05, rmin=0, rmax=1, raiseonerr=False):
    from scipy.special import xlogy
    from scipy.stats import f
    from math import exp, log
    import numpy as np

    assert wmin >= 0
    assert wmin < 1
    assert wmax > 1
    assert rmax >= rmin

    vhat, qmle = estimate(datagen=datagen, wmin=wmin, wmax=wmax,
                          rmin=rmin, rmax=rmax, raiseonerr=raiseonerr)
    num = qmle['num']
    if num < 2:
        return ((rmin, rmax), (None, None))
    betamle = qmle['betastar']

    Delta = 0.5 * f.isf(q=alpha, dfn=1, dfd=num-1)

    sumwsq = sum(c * w * w for c, w, _ in datagen())
    wscale = max(1.0, np.sqrt(sumwsq / num))
    rscale = max(1.0, np.abs(rmin), np.abs(rmax))

    # solve dual

    tiny = 1e-5
    logtiny = log(tiny)

    def safedenom(x):
        return x if x > tiny else exp(logstar(x))

    def logstar(x):
        return log(x) if x > tiny else -1.5 + logtiny + 2.0*(x/tiny) - 0.5*(x/tiny)*(x/tiny)

    def jaclogstar(x):
        return 1/x if x > tiny else (2.0 - (x/tiny))/tiny

    def hesslogstar(x):
        return -1/(x*x) if x > tiny else -1/(tiny*tiny)

    def dualobjective(p, sign):
        gamma, beta = p
        logcost = -Delta

        n = 0
        for c, w, r in datagen():
            if c > 0:
                n += c
                denom = gamma + (beta + sign * wscale * r) * (w / wscale)
                mledenom = num + betamle * (w - 1)
                logcost += c * (logstar(denom) - logstar(mledenom))

        assert n == num

        if n > 0:
            logcost /= n

        return (-n * exp(logcost) + gamma + beta / wscale) / rscale

    def jacdualobjective(p, sign):
        gamma, beta = p
        logcost = -Delta
        jac = np.zeros_like(p)

        n = 0
        for c, w, r in datagen():
            if c > 0:
                n += c
                denom = gamma + (beta + sign * wscale * r) * (w / wscale)
                mledenom = num + betamle * (w - 1)
                logcost += c * (logstar(denom) - logstar(mledenom))

                jaclogcost = c * jaclogstar(denom)
                jac[0] += jaclogcost
                jac[1] += jaclogcost * (w / wscale)

        assert n == num

        if n > 0:
            logcost /= n
            jac /= n

        jac *= -(n / rscale) * exp(logcost)
        jac[0] += 1 / rscale
        jac[1] += 1 / (wscale * rscale)

        return jac

    def hessdualobjective(p, sign):
        gamma, beta = p
        logcost = -Delta
        jac = np.zeros_like(p)
        hess = np.zeros((2,2))

        n = 0
        for c, w, r in datagen():
            if c > 0:
                n += c
                denom = gamma + (beta + sign * wscale * r) * (w / wscale)
                mledenom = num + betamle * (w - 1)
                logcost += c * (logstar(denom) - logstar(mledenom))

                jaclogcost = c * jaclogstar(denom)
                jac[0] += jaclogcost
                jac[1] += jaclogcost * (w / wscale)

                hesslogcost = c * hesslogstar(denom)
                hess[0][0] += hesslogcost
                hess[0][1] += hesslogcost * (w / wscale)
                hess[1][1] += hesslogcost * (w / wscale) * (w / wscale)

        assert n == num

        if n > 0:
            logcost /= n
            jac /= n
            hess /= n

        hess[1][0] = hess[0][1]
        hess += np.outer(jac, jac)
        hess *= -(n / rscale) * exp(logcost)

        return hess

    consE = np.array([
        [ 1, w / wscale ]
        for w in (wmin, wmax)
        for r in (rmin, rmax)
    ], dtype='float64')

    retvals = []

    easybounds = [ (qmle['vmin'] <= rmin + tiny, rmin),
                   (qmle['vmax'] >= rmax - tiny, rmax) ]
    for what in range(2):
        if easybounds[what][0]:
            retvals.append((easybounds[what][1], None))
            continue

        sign = 1 - 2 * what
        d = np.array([ -sign*w*r + tiny
                       for w in (wmin, wmax)
                       for r in (rmin, rmax)
                     ],
                     dtype='float64')

        minsr = min(sign*rmin, sign*rmax)
        gamma0, beta0 = ( num - qmle['betastar'] + 2 * tiny,
                          wscale * (qmle['betastar'] - (1 + 1 / wscale) * minsr)
                        )

        x0 = np.array([ gamma0, beta0 ])

        if raiseonerr:
           active = np.nonzero(consE.dot(x0) - d < 0)[0]
           from pprint import pformat
           assert active.size == 0, pformat({
                   'cons': consE.dot(x0) - d,
                   'd': d,
                   'consE.dot(x0)': consE.dot(x0),
                   'active': active,
                   'x0': x0,
                   'qstarnocv[{}]'.format(what): qstarnocv[what],
               })

#        from .gradcheck import gradcheck, hesscheck
#        gradcheck(f=lambda p: dualobjective(p, sign),
#                  jac=lambda p: jacdualobjective(p, sign),
#                  x=x0,
#                  what='dualobjective')
#
#        hesscheck(jac=lambda p: jacdualobjective(p, sign),
#                  hess=lambda p: hessdualobjective(p, sign),
#                  x=x0,
#                  what='jacdualobjective')

        # NB: things i've tried
        #
        # scipy.minimize method='slsqp': 3.78 it/s, sometimes fails
        # sqp with quadprog: 1.75 it/s, sometimes fails
        # sqp with cvxopt.qp: 1.05 s/it, reliable
        # cvxopt.cp: 1.37 s/it, reliable <= seems most trustworthy
        # minimize_ipopt: 4.85 s/it, reliable

##       from ipopt import minimize_ipopt
##       optresult = minimize_ipopt(
##                           options={
##                              'tol': 1e-12,
#        from scipy.optimize import minimize
#        optresult = minimize(method='slsqp',
#                             options={
#                               'ftol': 1e-12,
#                               'maxiter': 1000,
#                            },
#                            fun=dualobjective,
#                            x0=x0,
#                            args=(sign,),
#                            jac=jacdualobjective,
#                            #hess=hessdualobjective,
#                            constraints=[{
#                                'type': 'ineq',
#                                'fun': lambda x: consE.dot(x) - d,
#                                'jac': lambda x: consE
#                            }],
#                   )
#        if raiseonerr:
#            from pprint import pformat
#            assert optresult.success, pformat(optresult)
#
#        fstar, xstar = optresult.fun, optresult.x

#        from .sqp import sqp
#        fstar, xstar = sqp(
#                f=lambda p: dualobjective(p, sign),
#                gradf=lambda p: jacdualobjective(p, sign),
#                hessf=lambda p: hessdualobjective(p, sign),
#                E=consE,
#                d=d,
#                x0=x0,
#                strict=True,
#                condfac=1e-9,
#        )

        from cvxopt import solvers, matrix
        def F(x=None, z=None):
            if x is None: return 0, matrix(x0)
            p = np.reshape(np.array(x), -1)
            f = dualobjective(p, sign)
            jf = jacdualobjective(p, sign)
            Df = matrix(jf).T
            if z is None: return f, Df
            hf = z[0] * hessdualobjective(p, sign)
            H = matrix(hf, hf.shape)
            return f, Df, H

        soln = solvers.cp(F,
                          G=-matrix(consE, consE.shape),
                          h=-matrix(d),
                          options={'show_progress': False})

        if raiseonerr:
            from pprint import pformat
            assert soln['status'] == 'optimal', pformat(soln)

        xstar = soln['x']
        fstar = soln['primal objective']

        gammastar = xstar[0]
        betastar = xstar[1] / wscale
        kappastar = (-rscale * fstar + gammastar + betastar) / num

        qfunc = lambda c, w, r, kappa=kappastar, gamma=gammastar, beta=betastar, s=sign: kappa * c / (gamma + (beta + s * r) * w)

        vbound = -sign * rscale * fstar

        retvals.append(
           (vbound,
            {
                'gammastar': gammastar,
                'betastar': betastar,
                'kappastar': kappastar,
                'qfunc': qfunc,
            })
        )

    return (retvals[0][0], retvals[1][0]), (retvals[0][1], retvals[1][1])

class EasyMean:
    def __init__(self):
        self.n = 0
        self.partials = []

    def __iadd__(self, x):
        self.n += 1
        
        i = 0
        for y in self.partials:
            if abs(x) < abs(y):
                x, y = y, x
            hi = x + y
            lo = y - (hi - x)
            if lo:
                self.partials[i] = lo
                i += 1
            x = hi
        self.partials[i:] = [x]
        
        return self
    
    def __isub__(self, x):
        return self.__iadd__(-x)

    def mean(self):
        return sum(self.partials, 0.0) / max(self.n, 1)
    
class EasyAcc:
    def __init__(self):
        self.n = 0
        self.damean = EasyMean()
        self.dameansq = EasyMean()
        
    def __iadd__(self, x):
        self.n += 1
        self.damean += x
        self.dameansq += x * x
        return self
        
    def __isub__(self, x):
        self.n += 1
        self.damean -= x
        self.dameansq += x * x
        return self
        
    def mean(self):
        return self.damean.mean()
        
    def meanvar(self):
        from math import sqrt
        
        mux = self.damean.mean()
        muxsq = self.dameansq.mean()
        
        return mux, sqrt(muxsq - mux * mux) / sqrt(max(1, self.n))

def forkit(func, args, kwds):
    from multiprocessing import Pool
    
    pool = Pool(processes=1)
    return pool.apply(func, args, kwds)
    
def ccb_eval_slot1_mle(data, veedub):
    from vowpalwabbit import pyvw
    import gzip
    
    vw = pyvw.vw(veedub)
    
    print('{:<6s}\t{:<7s}\t{:<6s}\t{:<28s}\t{:<28s}\t{:<9s}\t{:<9s}'.format(
                     'n', 'nhit', 'predict', 'cost', 'since last', 'online', 'sincelastonline'
                ),
               flush=True)
    
    ipscost, sincelastipscost, online, sincelastonline, sincelastphit = (EasyAcc() for _ in range(5))
    nhit = 0
    
    alldata = []
    sincelastdata = []
    
    for n, ex in enumerate(gzip.open(data)):
        from pprint import pformat
        
        stringex = "".join(map(chr, ex))
        objex = json.loads(stringex)
        chosenzero = objex['_outcomes'][0]['_a'][0]
        chosenp = objex['_outcomes'][0]['_p'][0]
        chosencost = objex['_outcomes'][0]['_label_cost']
        pred = vw.predict(stringex)
        predzero = pred[0][0][0]
        vw.learn(stringex)
        
        online += chosencost
        sincelastonline += chosencost
        
        policycost = chosencost / chosenp if predzero == chosenzero else 0
        ipscost += policycost
        sincelastipscost += policycost
        
        sincelastphit += 1 if predzero == chosenzero else 0
        nhit += 1 if predzero == chosenzero else 0
        
        w = 1 / chosenp if predzero == chosenzero else 0
        r = -chosencost
        alldata.append((1, w, r))
        sincelastdata.append((1, w, r))

        if online.n & (online.n - 1) == 0:
            allestimate = asymptoticconfidenceinterval(datagen = lambda: alldata, wmin=0, wmax=30/0.2)
            allmle = estimate(datagen = lambda: alldata, wmin=0, wmax=30/0.2)
            sincelastestimate = asymptoticconfidenceinterval(datagen = lambda: sincelastdata, wmin=0, wmax=30/0.2)
            sincelastmle = estimate(datagen = lambda: sincelastdata, wmin=0, wmax=30/0.2)
            
            print('{:<6d}\t{:<7d}\t{:<6d}\t[{:.5f},{:.5f},{:.5f}]\t[{:.5f},{:.5f},{:.5f}]\t{:<9.5f}\t{:<9.5f}'.format(
                        online.n,
                        nhit,
                        predzero,
                        -allestimate[0][1],
                        -allmle[0],
                        -allestimate[0][0],
                        -sincelastestimate[0][1],
                        -sincelastmle[0],
                        -sincelastestimate[0][0],
                        online.mean(),
                        sincelastonline.mean(),
                 ),
                 flush=True)
            sincelastipscost, sincelastonline, sincelastpa = (EasyAcc() for _ in range(3))
            sincelastdata = []

    allestimate = asymptoticconfidenceinterval(datagen = lambda: alldata, wmin=0, wmax=30/0.2)
    sincelastestimate = asymptoticconfidenceinterval(datagen = lambda: sincelastdata, wmin=0, wmax=30/0.2)
    print('{:<6d}\t{:<7d}\t{:<6d}\t[{:.5f},{:.5f},{:.5f}]\t[{:.5f},{:.5f},{:.5f}]\t{:<9.5f}\t{:<9.5f}'.format(
                online.n,
                nhit,
                predzero,
                -allestimate[0][1],
                -allmle[0],
                -allestimate[0][0],
                -sincelastestimate[0][1],
                -sincelastmle[0],
                -sincelastestimate[0][0],
                online.mean(),
                sincelastonline.mean(),
            ),
            flush=True)  

## Original Loop Parameters

In [61]:
forkit(ccb_eval_slot1_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/allslots-5-5-2020_5-11-2020_s0.002_p43807_n118776.gz',
    'veedub': '--dsjson --ccb_explore_adf --epsilon 0 --cb_type mtr -l 1e-5 --power_t 0 --clip_p 0.2 -q cu -q cp -q cb -q du -q dp -q db -q eu -q ep -q eb --l1 1e-8 -b 20'
})

n     	nhit   	predict	cost                        	since last                  	online   	sincelastonline
1     	0      	0     	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
2     	0      	0     	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
4     	0      	29    	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
8     	1      	23    	[-0.99620,-0.42240,0.00000]	[-0.99917,-0.34479,0.00000]	0.00000  	0.00000  
16    	1      	12    	[-0.99684,-0.46120,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
32    	1      	0     	[-0.99810,-0.48060,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
64    	2      	1     	[-0.99375,-0.48060,0.00000]	[-0.99810,-0.48060,0.00000]	-0.00013 	-0.00025 
128   	5      	4     	[-0.72914,-0.00000,0.00000]	[-0.70246,-0.00000,0.00000]	-0.00016 	-0.00019 
256   	8      	0     	[-0.35257,-0.00182,-0.00012]	[-0.48071,-0.00267,-0.00017]	-0.00048 	-0.00080 
512   	18

## 5/7 Optimized Hyperparameters

In [62]:
forkit(ccb_eval_slot1_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/allslots-5-5-2020_5-11-2020_s0.002_p43807_n118776.gz',
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --power_t 0 -l 0.0001 --cb_type ips -q bc -q pc -q be -q uc -q bd'
})

n     	nhit   	predict	cost                        	since last                  	online   	sincelastonline
1     	0      	0     	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
2     	0      	0     	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
4     	0      	29    	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
8     	1      	23    	[-0.99620,-0.42240,0.00000]	[-0.99917,-0.34479,0.00000]	0.00000  	0.00000  
16    	1      	12    	[-0.99684,-0.46120,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
32    	1      	0     	[-0.99810,-0.48060,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
64    	2      	1     	[-0.84954,-0.00785,-0.00107]	[-0.87605,-0.00800,-0.00100]	-0.00013 	-0.00025 
128   	4      	1     	[-0.73803,-0.00639,-0.00088]	[-0.96037,-0.25749,0.00000]	-0.00016 	-0.00019 
256   	7      	1     	[-0.35666,-0.00547,-0.00199]	[-0.48210,-0.00533,-0.00126]	-0.00048 	-0.00080 
512   

## 5/11 Coin Hyperparameters

In [63]:
forkit(ccb_eval_slot1_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/allslots-5-5-2020_5-11-2020_s0.002_p43807_n118776.gz',
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --coin --cb_type ips -q bc -q pc -q be -q uc -q bd'
})

n     	nhit   	predict	cost                        	since last                  	online   	sincelastonline
1     	0      	0     	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
2     	0      	0     	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
4     	0      	29    	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
8     	1      	23    	[-0.99620,-0.42240,0.00000]	[-0.99917,-0.34479,0.00000]	0.00000  	0.00000  
16    	1      	12    	[-0.99684,-0.46120,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
32    	1      	0     	[-0.99810,-0.48060,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
64    	2      	1     	[-0.84954,-0.00785,-0.00107]	[-0.87605,-0.00800,-0.00100]	-0.00013 	-0.00025 
128   	6      	24    	[-0.53259,-0.00328,-0.00024]	[-0.68812,-0.00000,0.00000]	-0.00016 	-0.00019 
256   	13     	19    	[-0.42108,-0.00236,-0.00018]	[-0.81043,-0.00006,-0.00000]	-0.00048 	-0.00080 
512   

# All Slots, Empirical Likelihood

Train on all slots, evaluate on all slots using EL-based MIS estimator.

In [5]:
def estimate(datagen, wmin, wmax, rmin=0, rmax=1, raiseonerr=False, censored=False):
    import numpy as np
    from scipy.optimize import brentq

    assert wmin >= 0
    assert wmin < 1
    assert wmax > 1
    assert rmax >= rmin

    num = sum(c for c, w, r in datagen())
    assert num >= 1

    # solve dual

    def sumofw(beta):
        return sum((c * w)/((w - 1) * beta + num)
                   for c, w, _ in datagen()
                   if c > 0)

    # fun fact about the MLE:
    #
    # if \frac{1}{n} \sum_n w_n < 1 then \beta^* wants to be negative
    # but as wmax \to \infty, lower bound on \beta^* is 0
    # therefore the estimate becomes
    #
    # \hat{V}(\pi) = \left( \frac{1}{n} \sum_n w_n r_n \right) +
    #                \left( 1 - \frac{1}{n} \sum_n w_n \right) \rho
    #
    # where \rho is anything between rmin and rmax

    def graddualobjective(beta):
        return sum(c * (w - 1)/((w - 1) * beta + num)
                   for c, w, _ in datagen()
                   if c > 0)

    betamax = min( ((num - c) / (1 - w)
                    for c, w, _ in datagen()
                    if w < 1 and c > 0 ),
                   default=num / (1 - wmin))
    betamax = min(betamax, num / (1 - wmin))

    betamin = max( ((num - c) / (1 - w)
                    for c, w, _ in datagen()
                    if w > 1 and c > 0 ),
                   default=num / (1 - wmax))
    betamin = max(betamin, num / (1 - wmax))

    gradmin = graddualobjective(betamin)
    gradmax = graddualobjective(betamax)
    if gradmin * gradmax < 0:
        betastar = brentq(f=graddualobjective, a=betamin, b=betamax)
    elif gradmin < 0:
        betastar = betamin
    else:
        betastar = betamax

    remw = max(0.0, 1.0 - sumofw(betastar))

    if censored:
        vnumhat = 0
        vdenomhat = 0

        for c, w, r in datagen():
            if c > 0:
                if r is not None:
                    vnumhat += w*r* c/((w - 1) * betastar + num)
                    vdenomhat += w*1* c/((w - 1) * betastar + num)

        if np.allclose(vdenomhat, 0):
            vhat = vmin = vmax = None
        else:
            vnummin = vnumhat + remw * rmin
            vdenommin = vdenomhat + remw
            vmin = min([ vnummin / vdenommin, vnumhat / vdenomhat ])

            vnummax = vnumhat + remw * rmax
            vdenommax = vdenomhat + remw
            vmax = max([ vnummax / vdenommax, vnumhat / vdenomhat ])

            vhat = 0.5*(vmin + vmax)
    else:
        vhat = 0
        for c, w, r in datagen():
            if c > 0:
                vhat += w*r* c/((w - 1) * betastar + num)

        vmin = vhat + remw * rmin
        vmax = vhat + remw * rmax
        vhat += remw * (rmin + rmax) / 2.0

    return vhat, {
            'betastar': betastar,
            'vmin': vmin,
            'vmax': vmax,
            'num': num,
            'qfunc': lambda c, w, r: c / (num + betastar * (w - 1)),
           }

def asymptoticconfidenceinterval(datagen, wmin, wmax, alpha=0.05, rmin=0, rmax=1, raiseonerr=False):
    from scipy.special import xlogy
    from scipy.stats import f
    from math import exp, log
    import numpy as np

    assert wmin >= 0
    assert wmin < 1
    assert wmax > 1
    assert rmax >= rmin

    vhat, qmle = estimate(datagen=datagen, wmin=wmin, wmax=wmax,
                          rmin=rmin, rmax=rmax, raiseonerr=raiseonerr)
    num = qmle['num']
    if num < 2:
        return ((rmin, rmax), (None, None))
    betamle = qmle['betastar']

    Delta = 0.5 * f.isf(q=alpha, dfn=1, dfd=num-1)

    sumwsq = sum(c * w * w for c, w, _ in datagen())
    wscale = max(1.0, np.sqrt(sumwsq / num))
    rscale = max(1.0, np.abs(rmin), np.abs(rmax))

    # solve dual

    tiny = 1e-5
    logtiny = log(tiny)

    def safedenom(x):
        return x if x > tiny else exp(logstar(x))

    def logstar(x):
        return log(x) if x > tiny else -1.5 + logtiny + 2.0*(x/tiny) - 0.5*(x/tiny)*(x/tiny)

    def jaclogstar(x):
        return 1/x if x > tiny else (2.0 - (x/tiny))/tiny

    def hesslogstar(x):
        return -1/(x*x) if x > tiny else -1/(tiny*tiny)

    def dualobjective(p, sign):
        gamma, beta = p
        logcost = -Delta

        n = 0
        for c, w, r in datagen():
            if c > 0:
                n += c
                denom = gamma + (beta + sign * wscale * r) * (w / wscale)
                mledenom = num + betamle * (w - 1)
                logcost += c * (logstar(denom) - logstar(mledenom))

        assert n == num

        if n > 0:
            logcost /= n

        return (-n * exp(logcost) + gamma + beta / wscale) / rscale

    def jacdualobjective(p, sign):
        gamma, beta = p
        logcost = -Delta
        jac = np.zeros_like(p)

        n = 0
        for c, w, r in datagen():
            if c > 0:
                n += c
                denom = gamma + (beta + sign * wscale * r) * (w / wscale)
                mledenom = num + betamle * (w - 1)
                logcost += c * (logstar(denom) - logstar(mledenom))

                jaclogcost = c * jaclogstar(denom)
                jac[0] += jaclogcost
                jac[1] += jaclogcost * (w / wscale)

        assert n == num

        if n > 0:
            logcost /= n
            jac /= n

        jac *= -(n / rscale) * exp(logcost)
        jac[0] += 1 / rscale
        jac[1] += 1 / (wscale * rscale)

        return jac

    def hessdualobjective(p, sign):
        gamma, beta = p
        logcost = -Delta
        jac = np.zeros_like(p)
        hess = np.zeros((2,2))

        n = 0
        for c, w, r in datagen():
            if c > 0:
                n += c
                denom = gamma + (beta + sign * wscale * r) * (w / wscale)
                mledenom = num + betamle * (w - 1)
                logcost += c * (logstar(denom) - logstar(mledenom))

                jaclogcost = c * jaclogstar(denom)
                jac[0] += jaclogcost
                jac[1] += jaclogcost * (w / wscale)

                hesslogcost = c * hesslogstar(denom)
                hess[0][0] += hesslogcost
                hess[0][1] += hesslogcost * (w / wscale)
                hess[1][1] += hesslogcost * (w / wscale) * (w / wscale)

        assert n == num

        if n > 0:
            logcost /= n
            jac /= n
            hess /= n

        hess[1][0] = hess[0][1]
        hess += np.outer(jac, jac)
        hess *= -(n / rscale) * exp(logcost)

        return hess

    consE = np.array([
        [ 1, w / wscale ]
        for w in (wmin, wmax)
        for r in (rmin, rmax)
    ], dtype='float64')

    retvals = []

    easybounds = [ (qmle['vmin'] <= rmin + tiny, rmin),
                   (qmle['vmax'] >= rmax - tiny, rmax) ]
    for what in range(2):
        if easybounds[what][0]:
            retvals.append((easybounds[what][1], None))
            continue

        sign = 1 - 2 * what
        d = np.array([ -sign*w*r + tiny
                       for w in (wmin, wmax)
                       for r in (rmin, rmax)
                     ],
                     dtype='float64')

        minsr = min(sign*rmin, sign*rmax)
        gamma0, beta0 = ( num - qmle['betastar'] + 2 * tiny,
                          wscale * (qmle['betastar'] - (1 + 1 / wscale) * minsr)
                        )

        x0 = np.array([ gamma0, beta0 ])

        if raiseonerr:
           active = np.nonzero(consE.dot(x0) - d < 0)[0]
           from pprint import pformat
           assert active.size == 0, pformat({
                   'cons': consE.dot(x0) - d,
                   'd': d,
                   'consE.dot(x0)': consE.dot(x0),
                   'active': active,
                   'x0': x0,
                   'qstarnocv[{}]'.format(what): qstarnocv[what],
               })

#        from .gradcheck import gradcheck, hesscheck
#        gradcheck(f=lambda p: dualobjective(p, sign),
#                  jac=lambda p: jacdualobjective(p, sign),
#                  x=x0,
#                  what='dualobjective')
#
#        hesscheck(jac=lambda p: jacdualobjective(p, sign),
#                  hess=lambda p: hessdualobjective(p, sign),
#                  x=x0,
#                  what='jacdualobjective')

        # NB: things i've tried
        #
        # scipy.minimize method='slsqp': 3.78 it/s, sometimes fails
        # sqp with quadprog: 1.75 it/s, sometimes fails
        # sqp with cvxopt.qp: 1.05 s/it, reliable
        # cvxopt.cp: 1.37 s/it, reliable <= seems most trustworthy
        # minimize_ipopt: 4.85 s/it, reliable

##       from ipopt import minimize_ipopt
##       optresult = minimize_ipopt(
##                           options={
##                              'tol': 1e-12,
#        from scipy.optimize import minimize
#        optresult = minimize(method='slsqp',
#                             options={
#                               'ftol': 1e-12,
#                               'maxiter': 1000,
#                            },
#                            fun=dualobjective,
#                            x0=x0,
#                            args=(sign,),
#                            jac=jacdualobjective,
#                            #hess=hessdualobjective,
#                            constraints=[{
#                                'type': 'ineq',
#                                'fun': lambda x: consE.dot(x) - d,
#                                'jac': lambda x: consE
#                            }],
#                   )
#        if raiseonerr:
#            from pprint import pformat
#            assert optresult.success, pformat(optresult)
#
#        fstar, xstar = optresult.fun, optresult.x

#        from .sqp import sqp
#        fstar, xstar = sqp(
#                f=lambda p: dualobjective(p, sign),
#                gradf=lambda p: jacdualobjective(p, sign),
#                hessf=lambda p: hessdualobjective(p, sign),
#                E=consE,
#                d=d,
#                x0=x0,
#                strict=True,
#                condfac=1e-9,
#        )

        from cvxopt import solvers, matrix
        def F(x=None, z=None):
            if x is None: return 0, matrix(x0)
            p = np.reshape(np.array(x), -1)
            f = dualobjective(p, sign)
            jf = jacdualobjective(p, sign)
            Df = matrix(jf).T
            if z is None: return f, Df
            hf = z[0] * hessdualobjective(p, sign)
            H = matrix(hf, hf.shape)
            return f, Df, H

        soln = solvers.cp(F,
                          G=-matrix(consE, consE.shape),
                          h=-matrix(d),
                          options={'show_progress': False})

        if raiseonerr:
            from pprint import pformat
            assert soln['status'] == 'optimal', pformat(soln)

        xstar = soln['x']
        fstar = soln['primal objective']

        gammastar = xstar[0]
        betastar = xstar[1] / wscale
        kappastar = (-rscale * fstar + gammastar + betastar) / num

        qfunc = lambda c, w, r, kappa=kappastar, gamma=gammastar, beta=betastar, s=sign: kappa * c / (gamma + (beta + s * r) * w)

        vbound = -sign * rscale * fstar

        retvals.append(
           (vbound,
            {
                'gammastar': gammastar,
                'betastar': betastar,
                'kappastar': kappastar,
                'qfunc': qfunc,
            })
        )

    return (retvals[0][0], retvals[1][0]), (retvals[0][1], retvals[1][1])

class EasyMean:
    def __init__(self):
        self.n = 0
        self.partials = []

    def __iadd__(self, x):
        self.n += 1
        
        i = 0
        for y in self.partials:
            if abs(x) < abs(y):
                x, y = y, x
            hi = x + y
            lo = y - (hi - x)
            if lo:
                self.partials[i] = lo
                i += 1
            x = hi
        self.partials[i:] = [x]
        
        return self
    
    def __isub__(self, x):
        return self.__iadd__(-x)

    def mean(self):
        return sum(self.partials, 0.0) / max(self.n, 1)
    
class EasyAcc:
    def __init__(self):
        self.n = 0
        self.damean = EasyMean()
        self.dameansq = EasyMean()
        
    def __iadd__(self, x):
        self.n += 1
        self.damean += x
        self.dameansq += x * x
        return self
        
    def __isub__(self, x):
        self.n += 1
        self.damean -= x
        self.dameansq += x * x
        return self
        
    def mean(self):
        return self.damean.mean()
        
    def meanvar(self):
        from math import sqrt
        
        mux = self.damean.mean()
        muxsq = self.dameansq.mean()
        
        return mux, sqrt(muxsq - mux * mux) / sqrt(max(1, self.n))

def forkit(func, args, kwds):
    from multiprocessing import Pool
    
    pool = Pool(processes=1)
    return pool.apply(func, args, kwds)
    
def ccb_eval_allslots_mle(data, veedub):
    from collections import defaultdict
    from vowpalwabbit import pyvw
    import gzip
    
    vw = pyvw.vw(veedub)
    
    print('{:<6s}\t{:<28s}\t{:<28s}\t{:<9s}\t{:<9s}\t{:<28s}\t{:<28s}\t{:<9s}\t{:<9s}'.format(
                     'n', 'all slot cost', 'since last', 'online', 'sincelast',
                     'slot1 cost', 'since last', 'slot1 online', 'sincelast'
                ),
               flush=True)
    
    online, sincelastonline, onlineslot1, sincelastslot1 = (EasyAcc() for _ in range(4))
    nhit = 0
    
    alldata = defaultdict(list)
    sincelastdata = defaultdict(list)
    wmaxperslot = defaultdict(float)
    
    for n, ex in enumerate(gzip.open(data)):
        from pprint import pformat
        
        stringex = "".join(map(chr, ex))
        objex = json.loads(stringex)
        if len(objex['_outcomes']) > 6:
            continue
            
        pred = vw.predict(stringex)

        w = 1
        total = 0
        for slot, outcome in enumerate(objex['_outcomes']):
            slotpred = pred[slot][0][0]
            chosenslot = outcome['_a'][0]
            chosenp = outcome['_p'][0]
            chosencost = outcome['_label_cost']
            
            wmaxperslot[slot] = max(wmaxperslot[slot], 1 / chosenp)
            w = w / chosenp if slotpred == chosenslot else 0
            r = -chosencost
            alldata[slot].append((1, w, r))
            sincelastdata[slot].append((1, w, r))
            
            total += chosencost
            
            if slot == 0:
                onlineslot1 += chosencost
                sincelastslot1 += chosencost
                
        online += total
        sincelastonline += total
            
        vw.learn(stringex)

        if online.n & (online.n - 1) == 0:
            lb, mid, ub = 0, 0, 0
            sllb, slmid, slub = 0, 0, 0
            wmax = 1
            for slot in sorted(alldata.keys()):
                wmax *= wmaxperslot[slot]             
                allestimate = asymptoticconfidenceinterval(datagen = lambda: alldata[slot], wmin=0, wmax=wmax)
                allmle = estimate(datagen = lambda: alldata[slot], wmin=0, wmax=wmax)
                lb += -allestimate[0][1]
                mid += -allmle[1]['vmin']
                ub += -allestimate[0][0]
                sincelastestimate = asymptoticconfidenceinterval(datagen = lambda: sincelastdata[slot], wmin=0, wmax=wmax)
                sincelastmle = estimate(datagen = lambda: sincelastdata[slot], wmin=0, wmax=wmax)
                sllb += -sincelastestimate[0][1]
                slmid += -sincelastmle[1]['vmin']
                slub += -sincelastestimate[0][0] 
                if slot == 0:
                    slot1lb, slot1mid, slot1ub = -allestimate[0][1], -allmle[0], -allestimate[0][0]
                    slot1sllb, slot1slmid, slot1slub = -sincelastestimate[0][1], -sincelastmle[0], -sincelastestimate[0][0] 
                
            print('{:<6d}\t[{:.5f},{:.5f},{:.5f}]\t[{:.5f},{:.5f},{:.5f}]\t{:<9.5f}\t{:<9.5f}\t[{:.5f},{:.5f},{:.5f}]\t[{:.5f},{:.5f},{:.5f}]\t{:<9.5f}\t{:<9.5f}'.format(
                        online.n,
                        lb, mid, ub,
                        sllb, slmid, slub,
                        online.mean(),
                        sincelastonline.mean(),
                        slot1lb, slot1mid, slot1ub,
                        slot1sllb, slot1slmid, slot1slub,
                        onlineslot1.mean(),
                        sincelastslot1.mean(),
                 ),
                 flush=True)
            sincelastonline, sincelastslot1 = (EasyAcc() for _ in range(2))
            sincelastdata = defaultdict(list)

    lb, mid, ub = 0, 0, 0
    sllb, slmid, slub = 0, 0, 0
    wmax = 1
    for slot in sorted(alldata.keys()):
        wmax *= wmaxperslot[slot]               
        allestimate = asymptoticconfidenceinterval(datagen = lambda: alldata[slot], wmin=0, wmax=wmax)
        allmle = estimate(datagen = lambda: alldata[slot], wmin=0, wmax=wmax)
        lb += -allestimate[0][1]
        mid += -allmle[0]
        ub += -allestimate[0][0]
        sincelastestimate = asymptoticconfidenceinterval(datagen = lambda: sincelastdata[slot], wmin=0, wmax=wmax)
        sincelastmle = estimate(datagen = lambda: sincelastdata[slot], wmin=0, wmax=wmax)
        sllb += -sincelastestimate[0][1]
        slmid += -sincelastmle[0]
        slub += -sincelastestimate[0][0] 
        if slot == 0:
            slot1lb, slot1mid, slot1ub = -allestimate[0][1], -allmle[0], -allestimate[0][0]
            slot1sllb, slot1slmid, slot1slub = -sincelastestimate[0][1], -sincelastmle[0], -sincelastestimate[0][0] 

    print('{:<6d}\t[{:.5f},{:.5f},{:.5f}]\t[{:.5f},{:.5f},{:.5f}]\t{:<9.5f}\t{:<9.5f}\t[{:.5f},{:.5f},{:.5f}]\t[{:.5f},{:.5f},{:.5f}]\t{:<9.5f}\t{:<9.5f}'.format(
                online.n,
                lb, mid, ub,
                sllb, slmid, slub,
                online.mean(),
                sincelastonline.mean(),
                slot1lb, slot1mid, slot1ub,
                slot1sllb, slot1slmid, slot1slub,
                onlineslot1.mean(),
                sincelastslot1.mean(),
         ),
         flush=True)

## Original Loop Parameters

In [19]:
forkit(ccb_eval_allslots_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/allslots-5-5-2020_5-11-2020_s0.002_p43807_n118776.gz', 
    'veedub': '--dsjson --ccb_explore_adf --epsilon 0 --cb_type mtr -l 1e-5 --power_t 0 --clip_p 0.2 -q cu -q cp -q cb -q du -q dp -q db -q eu -q ep -q eb --l1 1e-8 -b 20'
})

n     	all slot cost               	since last                  	online   	sincelast	slot1 cost                  	since last                  	slot1 online	sincelast
1     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
2     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
4     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
8     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
16    	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	-0.00237 	-0.00475 	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
32    	[-4.68226,-0.03373,0.00000]	[-4.

## 5/7 Optimized Hyperparameters

In [5]:
forkit(ccb_eval_allslots_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/allslots-5-5-2020_5-11-2020_s0.002_p43807_n118776.gz', 
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --power_t 0 -l 0.0001 --cb_type ips -q bc -q pc -q be -q uc -q bd'
})

n     	all slot cost               	since last                  	online   	sincelast	slot1 cost                  	since last                  	slot1 online	sincelast
1     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
2     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
4     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
8     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
16    	[-4.99684,-2.46120,0.00000]	[-4.99620,-2.42240,0.00000]	-0.00237 	-0.00475 	[-0.99684,-0.46120,0.00000]	[-0.99620,-0.42240,0.00000]	0.00000  	0.00000  
32    	[-4.99810,-2.48060,0.00000]	[-5.

## 5/11 Coin Hyperparameters

In [5]:
forkit(ccb_eval_allslots_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/allslots-5-5-2020_5-11-2020_s0.002_p43807_n118776.gz', 
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --coin --cb_type ips -q bc -q pc -q be -q uc -q bd'
})

n     	all slot cost               	since last                  	online   	sincelast	slot1 cost                  	since last                  	slot1 online	sincelast
1     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
2     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
4     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
8     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
16    	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	-0.00237 	-0.00475 	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
32    	[-5.00000,-2.50000,0.00000]	[-5.

## 5/12 interactions0 Hyperparameters

In [3]:
forkit(ccb_eval_allslots_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/allslots-5-5-2020_5-11-2020_s0.002_p43807_n118776.gz', 
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --power_t 0 -l 0.0001 --cb_type ips -q pe -q bc'
})

n     	all slot cost               	since last                  	online   	sincelast	slot1 cost                  	since last                  	slot1 online	sincelast
1     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
2     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
4     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
8     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
16    	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	-0.00237 	-0.00475 	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
32    	[-5.00000,-2.50000,0.00000]	[-5.

## 5/12 interactions2 Hyperparameters

In [3]:
forkit(ccb_eval_allslots_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/allslots-5-5-2020_5-11-2020_s0.002_p43807_n118776.gz', 
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --power_t 0 -l 0.0001 --cb_type ips -q be -q pc -q ue -q bc'
})

n     	all slot cost               	since last                  	online   	sincelast	slot1 cost                  	since last                  	slot1 online	sincelast
1     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
2     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
4     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
8     	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	0.00000  	0.00000  	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
16    	[-5.00000,-2.50000,0.00000]	[-5.00000,-2.50000,0.00000]	-0.00237 	-0.00475 	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	0.00000  	0.00000  
32    	[-5.00000,-2.50000,0.00000]	[-5.

## xbetdealshomespotlight 5/13 optimized hyperparameters (interactions0)

In [7]:
forkit(ccb_eval_allslots_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/xbetdealshomespotlight/sample-0.004.allslots.dsjson.gz', 
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --power_t 0 -l 0.0001 --cb_type ips -q bc'
})

n     	all slot cost               	since last                  	online   	sincelast	slot1 cost                  	since last                  	slot1 online	sincelast
1     	[-6.00000,-3.00000,0.00000]	[-6.00000,-3.00000,0.00000]	-0.01600 	-0.01600 	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	-0.00800 	-0.00800 
2     	[-6.00001,-2.50000,0.00000]	[-6.00000,-2.50000,0.00000]	-0.00800 	0.00000  	[-1.00001,-0.00000,0.00000]	[-1.00000,-0.00000,0.00000]	-0.00400 	0.00000  
4     	[-5.92083,-2.50400,-0.00001]	[-6.00001,-2.50800,0.00001]	-0.00600 	-0.00400 	[-0.92083,-0.00400,-0.00001]	[-1.00001,-0.00800,0.00001]	-0.00400 	-0.00400 
8     	[-5.81979,-2.69083,-0.00003]	[-5.97481,-2.68958,0.00000]	-0.00400 	-0.00200 	[-0.81979,-0.19083,-0.00003]	[-0.97481,-0.18958,0.00000]	-0.00200 	0.00000  
16    	[-5.42893,-2.26319,-0.00050]	[-5.32091,-1.83555,-0.00037]	-0.00400 	-0.00400 	[-0.68877,-0.19145,-0.00043]	[-0.82015,-0.19207,-0.00028]	-0.00200 	-0.00200 
32    	[-5.72107,-2.63159,-0.00

## xbetdealshomespotlight 5/13 optimized hyperparameters (interactions2) wrong learning rate

In [2]:
forkit(ccb_eval_allslots_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/xbetdealshomespotlight/sample-0.004.allslots.dsjson.gz', 
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --power_t 0 --cb_type ips -q pc -q pe'
})

n     	all slot cost               	since last                  	online   	sincelast	slot1 cost                  	since last                  	slot1 online	sincelast
1     	[-6.00000,-3.00000,0.00000]	[-6.00000,-3.00000,0.00000]	-0.01600 	-0.01600 	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	-0.00800 	-0.00800 
2     	[-6.00001,-2.50000,0.00000]	[-6.00000,-2.50000,0.00000]	-0.00800 	0.00000  	[-1.00001,-0.00000,0.00000]	[-1.00000,-0.00000,0.00000]	-0.00400 	0.00000  
4     	[-5.92083,-2.50400,-0.00001]	[-6.00001,-2.50800,0.00001]	-0.00600 	-0.00400 	[-0.92083,-0.00400,-0.00001]	[-1.00001,-0.00800,0.00001]	-0.00400 	-0.00400 
8     	[-5.81979,-2.69083,-0.00003]	[-5.97481,-2.68958,0.00000]	-0.00400 	-0.00200 	[-0.81979,-0.19083,-0.00003]	[-0.97481,-0.18958,0.00000]	-0.00200 	0.00000  
16    	[-5.42893,-2.26319,-0.00050]	[-5.32091,-1.83555,-0.00037]	-0.00400 	-0.00400 	[-0.68877,-0.19145,-0.00043]	[-0.82015,-0.19207,-0.00028]	-0.00200 	-0.00200 
32    	[-5.50302,-2.46590,-0.00

## xbetdealshomespotlight 5/13 optimized hyperparameters (interactions2)

In [2]:
forkit(ccb_eval_allslots_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/xbetdealshomespotlight/sample-0.004.allslots.dsjson.gz', 
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --power_t 0 -l 0.0001 --cb_type ips -q pc -q pe'
})

n     	all slot cost               	since last                  	online   	sincelast	slot1 cost                  	since last                  	slot1 online	sincelast
1     	[-6.00000,-3.00000,0.00000]	[-6.00000,-3.00000,0.00000]	-0.01600 	-0.01600 	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	-0.00800 	-0.00800 
2     	[-6.00001,-2.50000,0.00000]	[-6.00000,-2.50000,0.00000]	-0.00800 	0.00000  	[-1.00001,-0.00000,0.00000]	[-1.00000,-0.00000,0.00000]	-0.00400 	0.00000  
4     	[-5.92083,-2.50400,-0.00001]	[-6.00001,-2.50800,0.00001]	-0.00600 	-0.00400 	[-0.92083,-0.00400,-0.00001]	[-1.00001,-0.00800,0.00001]	-0.00400 	-0.00400 
8     	[-5.81979,-2.69083,-0.00003]	[-5.97481,-2.68958,0.00000]	-0.00400 	-0.00200 	[-0.81979,-0.19083,-0.00003]	[-0.97481,-0.18958,0.00000]	-0.00200 	0.00000  
16    	[-5.42893,-2.26319,-0.00050]	[-5.32091,-1.83555,-0.00037]	-0.00400 	-0.00400 	[-0.68877,-0.19145,-0.00043]	[-0.82015,-0.19207,-0.00028]	-0.00200 	-0.00200 
32    	[-5.72107,-2.63159,-0.00

## xbetdealshomespotlight 5/13 current loop parameters, wrong learning rate

In [2]:
forkit(ccb_eval_allslots_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/xbetdealshomespotlight/sample-0.004.allslots.dsjson.gz', 
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --power_t 0 --cb_type ips -q bc -q pc -q be -q uc -q bd'
})

n     	all slot cost               	since last                  	online   	sincelast	slot1 cost                  	since last                  	slot1 online	sincelast
1     	[-6.00000,-3.00000,0.00000]	[-6.00000,-3.00000,0.00000]	-0.01600 	-0.01600 	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	-0.00800 	-0.00800 
2     	[-6.00001,-2.50000,0.00000]	[-6.00000,-2.50000,0.00000]	-0.00800 	0.00000  	[-1.00001,-0.00000,0.00000]	[-1.00000,-0.00000,0.00000]	-0.00400 	0.00000  
4     	[-5.92083,-2.50400,-0.00001]	[-6.00001,-2.50800,0.00001]	-0.00600 	-0.00400 	[-0.92083,-0.00400,-0.00001]	[-1.00001,-0.00800,0.00001]	-0.00400 	-0.00400 
8     	[-5.80929,-2.47737,-0.00007]	[-5.97252,-2.26268,-0.00001]	-0.00400 	-0.00200 	[-0.81979,-0.19083,-0.00003]	[-0.97481,-0.18958,0.00000]	-0.00200 	0.00000  
16    	[-5.29969,-2.15646,-0.00069]	[-5.32091,-1.83555,-0.00037]	-0.00400 	-0.00400 	[-0.68877,-0.19145,-0.00043]	[-0.82015,-0.19207,-0.00028]	-0.00200 	-0.00200 
32    	[-4.87109,-1.62604,-0.0

## xbetdealshomespotlight 5/13 current loop parameters

In [2]:
forkit(ccb_eval_allslots_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/xbetdealshomespotlight/sample-0.004.allslots.dsjson.gz', 
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --power_t 0 -l 0.0001 --cb_type ips -q bc -q pc -q be -q uc -q bd'
})

n     	all slot cost               	since last                  	online   	sincelast	slot1 cost                  	since last                  	slot1 online	sincelast
1     	[-6.00000,-3.00000,0.00000]	[-6.00000,-3.00000,0.00000]	-0.01600 	-0.01600 	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	-0.00800 	-0.00800 
2     	[-6.00001,-2.50000,0.00000]	[-6.00000,-2.50000,0.00000]	-0.00800 	0.00000  	[-1.00001,-0.00000,0.00000]	[-1.00000,-0.00000,0.00000]	-0.00400 	0.00000  
4     	[-5.92083,-2.50400,-0.00001]	[-6.00001,-2.50800,0.00001]	-0.00600 	-0.00400 	[-0.92083,-0.00400,-0.00001]	[-1.00001,-0.00800,0.00001]	-0.00400 	-0.00400 
8     	[-5.81979,-2.69083,-0.00003]	[-5.97481,-2.68958,0.00000]	-0.00400 	-0.00200 	[-0.81979,-0.19083,-0.00003]	[-0.97481,-0.18958,0.00000]	-0.00200 	0.00000  
16    	[-5.42893,-2.26319,-0.00050]	[-5.32091,-1.83555,-0.00037]	-0.00400 	-0.00400 	[-0.68877,-0.19145,-0.00043]	[-0.82015,-0.19207,-0.00028]	-0.00200 	-0.00200 
32    	[-4.71220,-1.53311,-0.00

## xbetdealshomespotlight, but hyperparameters from other loop

In [10]:
forkit(ccb_eval_allslots_mle, [], {
    'data': '/mnt/c/Users/pmineiro/Downloads/xbox/xbetdealshomespotlight/sample-0.004.allslots.dsjson.gz', 
    'veedub': '--dsjson --ccb_explore_adf -b 20 --epsilon 0 --power_t 0 -l 0.0001 --cb_type ips -q be -q pc -q ue -q bc'
})

n     	all slot cost               	since last                  	online   	sincelast	slot1 cost                  	since last                  	slot1 online	sincelast
1     	[-6.00000,-3.00000,0.00000]	[-6.00000,-3.00000,0.00000]	-0.01600 	-0.01600 	[-1.00000,-0.50000,0.00000]	[-1.00000,-0.50000,0.00000]	-0.00800 	-0.00800 
2     	[-6.00001,-2.50000,0.00000]	[-6.00000,-2.50000,0.00000]	-0.00800 	0.00000  	[-1.00001,-0.00000,0.00000]	[-1.00000,-0.00000,0.00000]	-0.00400 	0.00000  
4     	[-5.92083,-2.50400,-0.00001]	[-6.00001,-2.50800,0.00001]	-0.00600 	-0.00400 	[-0.92083,-0.00400,-0.00001]	[-1.00001,-0.00800,0.00001]	-0.00400 	-0.00400 
8     	[-5.81979,-2.69083,-0.00003]	[-5.97481,-2.68958,0.00000]	-0.00400 	-0.00200 	[-0.81979,-0.19083,-0.00003]	[-0.97481,-0.18958,0.00000]	-0.00200 	0.00000  
16    	[-5.42893,-2.26319,-0.00050]	[-5.32091,-1.83555,-0.00037]	-0.00400 	-0.00400 	[-0.68877,-0.19145,-0.00043]	[-0.82015,-0.19207,-0.00028]	-0.00200 	-0.00200 
32    	[-5.66290,-2.59310,-0.00