# Thesis Appendix A.1 - Ranking Under Lower Uncertainty: Probability in Gen-erating Specific Order Statistic Pairs

This notebook generates the plots featured in the said appendix. See the caption of each plot for more detail.

In [1]:
import numpy as np
import os
from scipy.special import rel_entr
from scipy.stats import rankdata, betabinom
import sys

sys.path.append(os.path.dirname(os.getcwd()))

from rulu.normal_normal_model import (
    _E_F_Zn_ZIr_taylor_second_order, _var_F_Zn_ZIr_taylor_second_order, 
    _E_F_Zn_ZIr_taylor_fourth_order, _var_F_Zn_ZIr_taylor_fourth_order,
    _E_F_Zn_ZIr_taylor_sixth_order, _var_F_Zn_ZIr_taylor_sixth_order,
    _E_F_Zn_ZIr_owen_integrals, _var_F_Zn_ZIr_owen_integrals)
from rulu.utils import fit_beta_distribution_params

------

Some parameter settings used in the appendix:

Small $N$ example 1:
* $N=25$, 
* $\mu_\mathcal{V} = \mu_\epsilon = 0$
* $\sigma^2_\mathcal{V} = 1$
* $\sigma^2_1 = 0.5$
* $\sigma^2_2 = 0.4$
* Number of runs: 500 000

Small $N$ example 2:
* $N=10$
* $\mu_\mathcal{V} = \mu_\epsilon = 0$
* $\sigma^2_\mathcal{V} = 1$
* $\sigma^2_1 = 0.5$
* $\sigma^2_2 = 0.4$
* Number of runs: 500 000

Large $N$ example (taken from DSE paper Section 7.2 / Thesis Section 2.7.2: Case study - Marketing experiments - Scenario 1 (Reducing noise from 5% to 0.8%)):
* $N=184$
* $\mu_\mathcal{V} = 19.9$
* $\mu_\epsilon = 0$
* $\sigma^2_\mathcal{V} = 10^2$
* $\sigma^2_1 = 5^2$
* $\sigma^2_2 = 0.8^2$
* Number of runs: 2 000 000

In [2]:
# Ranking under lower uncertainty (normal) model parameters

N = 25
mu_V = 0
mu_epsilon = 0
sigma_sq_V = 1
sigma_sq_1 = 0.5
sigma_sq_2 = 0.4

sigma_V = np.sqrt(sigma_sq_V)
sigma_1 = np.sqrt(sigma_sq_1)
sigma_2 = np.sqrt(sigma_sq_2)

n_runs = 500000

In [3]:
rank_pairs = np.zeros((N, N))

for run in range(0, n_runs):
    V = np.random.default_rng().normal(mu_V, sigma_V, N)
    H = V + np.random.default_rng().normal(mu_epsilon, sigma_1, N)
    L = V + np.random.default_rng().normal(mu_epsilon, sigma_2, N)
    
    ranks_H = rankdata(H)
    ranks_L = rankdata(L)
    
    for i in range(0, N):
        r = ranks_H[i]
        s = ranks_L[i]
        rank_pairs[int(r - 1), int(s - 1)] += 1

We obtain the KL divergence of each candidate beta-binomial distribtuion. This is defined as the mean KL divergence across each marginal distribution (fixing $r$):

$$ \begin{align}
    & \, \frac{1}{N} \sum_{r=1}^{N} D_{KL}\left(\hat{f}_{\mathcal{C} \,|\, N-1, r, ...} \,||\, f_{\hat{\mathcal{C}} \,|\, N-1, r, ...}\right) \nonumber\\
    = &\, \frac{1}{N} \sum_{r=1}^{N} \sum_{s=1}^{N} \hat{f}_{\mathcal{C}}(s-1 \,|\, N-1, r, ...) \log\left(\frac{\hat{f}_{\mathcal{C}}(s-1 \,|\, N-1, r, ...)}{f_{\hat{\mathcal{C}}}(s-1 \,|\, N-1, r, ...)}\right)
    \,,
\end{align} $$

In [4]:
uniform_kl = np.empty(N)
taylor_second_order_kl = np.empty(N)
taylor_fourth_order_kl = np.empty(N)
taylor_sixth_order_kl = np.empty(N)
owen_integrals_kl = np.empty(N)

for r in range(0, N):

    empirical_dist = rank_pairs[r, :] / n_runs
    uniform_dist = np.ones(N) / N

    # Taylor series expansion (2nd order terms)
    alpha_taylor_second_order, beta_taylor_second_order = (
        fit_beta_distribution_params(
            _E_F_Zn_ZIr_taylor_second_order(
                r=r+1, N=N, mu_X=mu_V, mu_eps=mu_epsilon, 
                sigma_sq_X=sigma_sq_V, sigma_sq_1=sigma_sq_1, sigma_sq_2=sigma_sq_2),
            _var_F_Zn_ZIr_taylor_second_order(
                r=r+1, N=N, mu_X=mu_V, mu_eps=mu_epsilon,
                sigma_sq_X=sigma_sq_V, sigma_sq_1=sigma_sq_1, sigma_sq_2=sigma_sq_2)
        )
    )
    taylor_second_order_dist = (
        betabinom.pmf(np.arange(0, N, 1), N-1, alpha_taylor_second_order, beta_taylor_second_order)
    )

    # Taylor series expansion (4th order terms)
    alpha_taylor_fourth_order, beta_taylor_fourth_order = (
        fit_beta_distribution_params(
            _E_F_Zn_ZIr_taylor_fourth_order(
                r=r+1, N=N, mu_X=mu_V, mu_eps=mu_epsilon,
                sigma_sq_X=sigma_sq_V, sigma_sq_1=sigma_sq_1, sigma_sq_2=sigma_sq_2),
            _var_F_Zn_ZIr_taylor_fourth_order(
                r=r+1, N=N, mu_X=mu_V, mu_eps=mu_epsilon,
                sigma_sq_X=sigma_sq_V, sigma_sq_1=sigma_sq_1, sigma_sq_2=sigma_sq_2)
        )
    )
    taylor_fourth_order_dist = (
        betabinom.pmf(np.arange(0, N, 1), N-1, alpha_taylor_fourth_order, beta_taylor_fourth_order)
    )

    # Taylor series expansion (6th order terms)
    alpha_taylor_sixth_order, beta_taylor_sixth_order = (
        fit_beta_distribution_params(
            _E_F_Zn_ZIr_taylor_second_order(
                r=r+1, N=N, mu_X=mu_V, mu_eps=mu_epsilon,
                sigma_sq_X=sigma_sq_V, sigma_sq_1=sigma_sq_1, sigma_sq_2=sigma_sq_2),
            _var_F_Zn_ZIr_taylor_sixth_order(
                r=r+1, N=N, mu_X=mu_V, mu_eps=mu_epsilon,
                sigma_sq_X=sigma_sq_V, sigma_sq_1=sigma_sq_1, sigma_sq_2=sigma_sq_2)
        )
    )
    taylor_sixth_order_dist = (
        betabinom.pmf(np.arange(0, N, 1), N-1, alpha_taylor_sixth_order, beta_taylor_sixth_order)
    )

    # Using Owen's integrals of Gaussian functions
    alpha_owen_integrals, beta_owen_integrals = (
        fit_beta_distribution_params(
            _E_F_Zn_ZIr_owen_integrals(
                r=r+1, sigma_sq_X=sigma_sq_V, sigma_sq_1=sigma_sq_1, sigma_sq_2=sigma_sq_2, N=N),
            _var_F_Zn_ZIr_owen_integrals(
                r=r+1, sigma_sq_X=sigma_sq_V, sigma_sq_1=sigma_sq_1, sigma_sq_2=sigma_sq_2, N=N)
        )
    )
    owen_integrals_dist = betabinom.pmf(np.arange(0, N, 1), N-1, alpha_owen_integrals, beta_owen_integrals)

    uniform_kl[r] = np.sum(rel_entr(empirical_dist, uniform_dist))
    taylor_second_order_kl[r] = np.sum(rel_entr(empirical_dist, taylor_second_order_dist))
    taylor_fourth_order_kl[r] = np.sum(rel_entr(empirical_dist, taylor_fourth_order_dist))
    taylor_sixth_order_kl[r] = np.sum(rel_entr(empirical_dist, taylor_sixth_order_dist))
    owen_integrals_kl[r] = np.sum(rel_entr(empirical_dist, owen_integrals_dist))

In [5]:
print("D_KL(empirical || uniform): \t\t", np.mean(uniform_kl))
print("D_KL(empirical || Taylor 2nd order): \t", np.mean(taylor_second_order_kl))
print("D_KL(empirical || Taylor 4th order): \t", np.mean(taylor_fourth_order_kl))
print("D_KL(empirical || Taylor 6th order): \t", np.mean(taylor_sixth_order_kl))
print("D_KL(empirical || Owen integrals): \t", np.mean(owen_integrals_kl))

D_KL(empirical || uniform): 		 0.2840618552024391
D_KL(empirical || Taylor 2nd order): 	 0.06212108952884576
D_KL(empirical || Taylor 4th order): 	 0.019209842101433172
D_KL(empirical || Taylor 6th order): 	 0.02045389840688097
D_KL(empirical || Owen integrals): 	 0.003224913984511785


---------

We then analyse the mean and standard deviation of mean KL divergence for different modelled distribution across multiple simulations.

In [6]:
# N = 10
mean_uniform_kl = [0.24583566160632592, 0.24573391448486442, 0.24599338741540477, 0.24583141872615566, 0.24573698092468388, 0.2461226930579707, 0.24568685487412384, 0.2452553462449177, 0.24614485425954064, ]
mean_taylor_second_order_kl = [0.059291643965017835, 0.059323479985341744, 0.05937075092873617, 0.059233841590590884, 0.05931187222558796, 0.05930655335697639, 0.05904111087245195, 0.05887238547891825,0.05942999030790099, ]
mean_taylor_fourth_order_kl = [0.010664858693916868, 0.010682561675408498, 0.010692001248505209, 0.010678947330902949, 0.010640062946571598, 0.010729869094094397, 0.010767615512869727, 0.010717219649995103, 0.010686069529627779, ]
mean_taylor_sixth_order_kl = [0.03421506704907094, 0.03421000985207649, 0.034254182584104245, 0.03414188991979951, 0.034211948046215065, 0.034164704319572914, 0.033975217400172474, 0.03385254034355525, 0.03427938328733251, ]
mean_owen_integrals_kl = [0.007861709424365293, 0.007870244236657534, 0.0079003953210982, 0.0078323168448525, 0.007848710117390936, 0.007871463868994576, 0.007788141364500197, 0.0076875895515729625, 0.007910172720101786, ]

In [7]:
# N = 25
mean_uniform_kl = [0.28331888615319534, 0.2835997121662023, 0.28392553876468357, 0.28342456987658543, 0.2835646258026046,0.2836361558102289, 0.2836627390340373, 0.2833345481403758, 0.2837740716580439, 0.2835611618518951, ]
mean_taylor_second_order_kl = [0.06188888789398659, 0.06190172269512084, 0.06192145905218545, 0.061817250292931725, 0.06189671981628173,0.06204591250471147, 0.06183097908113955, 0.06181554325556002, 0.062012772669191876, 0.0618982049154862, ]
mean_taylor_fourth_order_kl = [0.019267647581680952, 0.01931742706417001, 0.01935302515788559, 0.019332788200761036, 0.019284580077475848, 0.019248324685128656, 0.01931810237883875, 0.019305387505103043, 0.019283358940713927, 0.01936716811311133, ]
mean_taylor_sixth_order_kl = [0.02028611802909269, 0.020276351607122635, 0.02028212925804966, 0.020238674248410842, 0.020300192290752538,0.020364637636959646, 0.02027452510104718, 0.020252509615101527, 0.020350160658980997, 0.02023062053376484, ]
mean_owen_integrals_kl = [0.0031395914253396794, 0.0031721123537456876, 0.003187326781318354, 0.0031487269117780737, 0.0031625853458602487, 0.0031928682458010078, 0.0031566320360804897, 0.0031381724966117047, 0.0032008125367990635, 0.00316759553731204, ]

In [8]:
# N = 184
mean_uniform_kl = [0.7702756037247123, 0.7701374758087426, 0.7702391288834806, 0.7701424819303399, 0.7701908656569352, 0.7700789980628855, 0.7701005320583016, 0.7700789980628855, 0.7701005320583016, ]
mean_taylor_second_order_kl = [0.025539126613817697, 0.025557626740464565, 0.02554198472973204, 0.025557746478644144, 0.025562797230028043, 0.025573068003629133, 0.025539317405245728, 0.025573068003629133, 0.025539317405245728, ]
mean_taylor_fourth_order_kl = [0.0075048259137461895, 0.007491744908045821, 0.007504212440829777, 0.007491639760194807, 0.00749783104146689, 0.007486244334450134, 0.007487266470390229, 0.007486244334450134, 0.007487266470390229, ]
mean_taylor_sixth_order_kl = [0.005826607354630456, 0.005812877388863058, 0.005827338241837454, 0.005815286501015679, 0.005820463093529793, 0.005815299230034815, 0.005809521075114366, 0.005815299230034815, 0.005809521075114366, ]
mean_owen_integrals_kl = [0.005809543573652463, 0.0057953601645784145, 0.0058105617744172505, 0.00579721444248038, 0.005803009832051783, 0.005795806685300347, 0.005791793891654175, 0.005795806685300347, 0.005791793891654175, ]

In [9]:
for (distribution_name, mean_kl) in [
    ("Uniform", mean_uniform_kl),
    ("Beta-binomial from Taylor SE (2nd order)", mean_taylor_second_order_kl),
    ("Beta-binomial from Taylor SE (4th order)", mean_taylor_fourth_order_kl),
    ("Beta-binomial from Taylor SE (6th order)", mean_taylor_sixth_order_kl),
    ("Beta-binomial from Owen's integrals", mean_owen_integrals_kl),
]:
    mean_mean_kl = np.mean(mean_kl)
    stddev_mean_kl = np.std(mean_kl)
    print(f"D_KL(Empirical || {distribution_name}):\t {mean_mean_kl} +/- {stddev_mean_kl}")


D_KL(Empirical || Uniform):	 0.7701494018051761 +/- 6.714587462568992e-05
D_KL(Empirical || Beta-binomial from Taylor SE (2nd order)):	 0.0255537836233818 +/- 1.3449817320544477e-05
D_KL(Empirical || Beta-binomial from Taylor SE (4th order)):	 0.0074930306304404675 +/- 7.065973619239086e-06
D_KL(Empirical || Beta-binomial from Taylor SE (6th order)):	 0.00581691257668609 +/- 6.224419756566008e-06
D_KL(Empirical || Beta-binomial from Owen's integrals):	 0.00579898788234326 +/- 6.680037713325263e-06


----------
Raw output from the first section of code:

In [10]:
# N = 10
# D_KL(empirical || uniform): 		 0.24583566160632592
# D_KL(empirical || Taylor 2nd order): 	 0.059291643965017835
# D_KL(empirical || Taylor 4th order): 	 0.010664858693916868
# D_KL(empirical || Taylor 6th order): 	 0.03421506704907094
# D_KL(empirical || Owen integrals): 	 0.007861709424365293
# ----------
# D_KL(empirical || uniform): 		 0.24573391448486442
# D_KL(empirical || Taylor 2nd order): 	 0.059323479985341744
# D_KL(empirical || Taylor 4th order): 	 0.010682561675408498
# D_KL(empirical || Taylor 6th order): 	 0.03421000985207649
# D_KL(empirical || Owen integrals): 	 0.007870244236657534
# ----------
# D_KL(empirical || uniform): 		 0.24599338741540477
# D_KL(empirical || Taylor 2nd order): 	 0.05937075092873617
# D_KL(empirical || Taylor 4th order): 	 0.010692001248505209
# D_KL(empirical || Taylor 6th order): 	 0.034254182584104245
# D_KL(empirical || Owen integrals): 	 0.0079003953210982
# ----------
# D_KL(empirical || uniform): 		 0.24583141872615566
# D_KL(empirical || Taylor 2nd order): 	 0.059233841590590884
# D_KL(empirical || Taylor 4th order): 	 0.010678947330902949
# D_KL(empirical || Taylor 6th order): 	 0.03414188991979951
# D_KL(empirical || Owen integrals): 	 0.0078323168448525
# ----------
# D_KL(empirical || uniform): 		 0.24573698092468388
# D_KL(empirical || Taylor 2nd order): 	 0.05931187222558796
# D_KL(empirical || Taylor 4th order): 	 0.010640062946571598
# D_KL(empirical || Taylor 6th order): 	 0.034211948046215065
# D_KL(empirical || Owen integrals): 	 0.007848710117390936
# ----------
# D_KL(empirical || uniform): 		 0.2461226930579707
# D_KL(empirical || Taylor 2nd order): 	 0.05930655335697639
# D_KL(empirical || Taylor 4th order): 	 0.010729869094094397
# D_KL(empirical || Taylor 6th order): 	 0.034164704319572914
# D_KL(empirical || Owen integrals): 	 0.007871463868994576
# ----------
# D_KL(empirical || uniform): 		 0.24568685487412384
# D_KL(empirical || Taylor 2nd order): 	 0.05904111087245195
# D_KL(empirical || Taylor 4th order): 	 0.010767615512869727
# D_KL(empirical || Taylor 6th order): 	 0.033975217400172474
# D_KL(empirical || Owen integrals): 	 0.007788141364500197
# ----------
# D_KL(empirical || uniform): 		 0.2452553462449177
# D_KL(empirical || Taylor 2nd order): 	 0.05887238547891825
# D_KL(empirical || Taylor 4th order): 	 0.010717219649995103
# D_KL(empirical || Taylor 6th order): 	 0.03385254034355525
# D_KL(empirical || Owen integrals): 	 0.0076875895515729625
# ----------
# D_KL(empirical || uniform): 		 0.24614485425954064
# D_KL(empirical || Taylor 2nd order): 	 0.05942999030790099
# D_KL(empirical || Taylor 4th order): 	 0.010686069529627779
# D_KL(empirical || Taylor 6th order): 	 0.03427938328733251
# D_KL(empirical || Owen integrals): 	 0.007910172720101786
# ----------

In [11]:
# N = 25
# D_KL(empirical || uniform): 		 0.28331888615319534
# D_KL(empirical || Taylor 2nd order): 	 0.06188888789398659
# D_KL(empirical || Taylor 4th order): 	 0.019267647581680952
# D_KL(empirical || Taylor 6th order): 	 0.02028611802909269
# D_KL(empirical || Owen integrals): 	 0.0031395914253396794
# ----------
# D_KL(empirical || uniform): 		 0.2835997121662023
# D_KL(empirical || Taylor 2nd order): 	 0.06190172269512084
# D_KL(empirical || Taylor 4th order): 	 0.01931742706417001
# D_KL(empirical || Taylor 6th order): 	 0.020276351607122635
# D_KL(empirical || Owen integrals): 	 0.0031721123537456876
# ----------
# D_KL(empirical || uniform): 		 0.28392553876468357
# D_KL(empirical || Taylor 2nd order): 	 0.06192145905218545
# D_KL(empirical || Taylor 4th order): 	 0.01935302515788559
# D_KL(empirical || Taylor 6th order): 	 0.02028212925804966
# D_KL(empirical || Owen integrals): 	 0.003187326781318354
# ----------
# D_KL(empirical || uniform): 		 0.28342456987658543
# D_KL(empirical || Taylor 2nd order): 	 0.061817250292931725
# D_KL(empirical || Taylor 4th order): 	 0.019332788200761036
# D_KL(empirical || Taylor 6th order): 	 0.020238674248410842
# D_KL(empirical || Owen integrals): 	 0.0031487269117780737
# ----------
# D_KL(empirical || uniform): 		 0.2835646258026046
# D_KL(empirical || Taylor 2nd order): 	 0.06189671981628173
# D_KL(empirical || Taylor 4th order): 	 0.019284580077475848
# D_KL(empirical || Taylor 6th order): 	 0.020300192290752538
# D_KL(empirical || Owen integrals): 	 0.0031625853458602487
# ----------
# D_KL(empirical || uniform): 		 0.2836361558102289
# D_KL(empirical || Taylor 2nd order): 	 0.06204591250471147
# D_KL(empirical || Taylor 4th order): 	 0.019248324685128656
# D_KL(empirical || Taylor 6th order): 	 0.020364637636959646
# D_KL(empirical || Owen integrals): 	 0.0031928682458010078
# ----------
# D_KL(empirical || uniform): 		 0.2836627390340373
# D_KL(empirical || Taylor 2nd order): 	 0.06183097908113955
# D_KL(empirical || Taylor 4th order): 	 0.01931810237883875
# D_KL(empirical || Taylor 6th order): 	 0.02027452510104718
# D_KL(empirical || Owen integrals): 	 0.0031566320360804897
# ----------
# D_KL(empirical || uniform): 		 0.2833345481403758
# D_KL(empirical || Taylor 2nd order): 	 0.06181554325556002
# D_KL(empirical || Taylor 4th order): 	 0.019305387505103043
# D_KL(empirical || Taylor 6th order): 	 0.020252509615101527
# D_KL(empirical || Owen integrals): 	 0.0031381724966117047
# ----------
# D_KL(empirical || uniform): 		 0.2837740716580439
# D_KL(empirical || Taylor 2nd order): 	 0.062012772669191876
# D_KL(empirical || Taylor 4th order): 	 0.019283358940713927
# D_KL(empirical || Taylor 6th order): 	 0.020350160658980997
# D_KL(empirical || Owen integrals): 	 0.0032008125367990635
# ----------
# D_KL(empirical || uniform): 		 0.2835611618518951
# D_KL(empirical || Taylor 2nd order): 	 0.0618982049154862
# D_KL(empirical || Taylor 4th order): 	 0.01936716811311133
# D_KL(empirical || Taylor 6th order): 	 0.02023062053376484
# D_KL(empirical || Owen integrals): 	 0.00316759553731204
# ----------

In [12]:
# N = 184
# D_KL(empirical || uniform): 		 0.7702756037247123
# D_KL(empirical || Taylor 2nd order): 	 0.025539126613817697
# D_KL(empirical || Taylor 4th order): 	 0.0075048259137461895
# D_KL(empirical || Taylor 6th order): 	 0.005826607354630456
# D_KL(empirical || Owen integrals): 	 0.005809543573652463
# ----------
# D_KL(empirical || uniform): 		 0.7701374758087426
# D_KL(empirical || Taylor 2nd order): 	 0.025557626740464565
# D_KL(empirical || Taylor 4th order): 	 0.007491744908045821
# D_KL(empirical || Taylor 6th order): 	 0.005812877388863058
# D_KL(empirical || Owen integrals): 	 0.0057953601645784145
# ----------
# D_KL(empirical || uniform): 		 0.7702391288834806
# D_KL(empirical || Taylor 2nd order): 	 0.02554198472973204
# D_KL(empirical || Taylor 4th order): 	 0.007504212440829777
# D_KL(empirical || Taylor 6th order): 	 0.005827338241837454
# D_KL(empirical || Owen integrals): 	 0.0058105617744172505
# ----------
# D_KL(empirical || uniform): 		 0.7701424819303399
# D_KL(empirical || Taylor 2nd order): 	 0.025557746478644144
# D_KL(empirical || Taylor 4th order): 	 0.007491639760194807
# D_KL(empirical || Taylor 6th order): 	 0.005815286501015679
# D_KL(empirical || Owen integrals): 	 0.00579721444248038
# ----------
# D_KL(empirical || uniform): 		 0.7701908656569352
# D_KL(empirical || Taylor 2nd order): 	 0.025562797230028043
# D_KL(empirical || Taylor 4th order): 	 0.00749783104146689
# D_KL(empirical || Taylor 6th order): 	 0.005820463093529793
# D_KL(empirical || Owen integrals): 	 0.005803009832051783
# ----------
# D_KL(empirical || uniform): 		 0.7700789980628855
# D_KL(empirical || Taylor 2nd order): 	 0.025573068003629133
# D_KL(empirical || Taylor 4th order): 	 0.007486244334450134
# D_KL(empirical || Taylor 6th order): 	 0.005815299230034815
# D_KL(empirical || Owen integrals): 	 0.005795806685300347
# ----------
# D_KL(empirical || uniform): 		 0.7701005320583016
# D_KL(empirical || Taylor 2nd order): 	 0.025539317405245728
# D_KL(empirical || Taylor 4th order): 	 0.007487266470390229
# D_KL(empirical || Taylor 6th order): 	 0.005809521075114366
# D_KL(empirical || Owen integrals): 	 0.005791793891654175
# ----------