In [11]:
from __future__ import division

import numpy as np
from scipy import stats

In [12]:
n = 10

s1 = np.random.binomial(n, p=0.2, size=1)[0]
s2 = np.random.binomial(n, p=0.4, size=1)[0]
s3 = np.random.binomial(n, p=0.6, size=1)[0]
s4 = np.random.binomial(n, p=0.8, size=1)[0]

samples = {'s1':[s1,n], 's2':[s2,n], 's3':[s3,n], 's4':[s4,n]}

####We demonstrate the lower bound of the wilson score confidence interval for ranking.
<br></br>
$$
\bigg[\frac{1}{1+\frac{1}{n}z^2_{1-\alpha/2}}\bigg]{\hat{p}+ \frac{1}{2n}z^2_{1-\alpha/2} \pm z_{1-\alpha/2}\sqrt{\frac{\hat{p}(1-\hat{p})}{n}+\frac{z^2_{1-\alpha/2}}{4n^2}}}
$$
####In sorting by this lower bound, we are saying that we are 95% confident that the true parameter is at least this lower bound.

In [1]:
def wilson_score_interval(success_count, total_count, alpha):
    """Estimates a 100(1-alpha)% wilson score confidence interval for the parameter"""
    cv_of_z = stats.norm.isf(alpha / 2)
    p_hat = success_count / total_count
    lower_bound = p_hat + cv_of_z**2/(2*total_count) - cv_of_z * \
        np.sqrt((p_hat*(1-p_hat) + cv_of_z**2/(4*total_count))/total_count)/ \
        (1 + cv_of_z**2/total_count)
    upper_bound = p_hat + cv_of_z**2/(2*total_count) + cv_of_z * \
        np.sqrt((p_hat*(1-p_hat) + cv_of_z**2/(4*total_count))/total_count)/ \
        (1 + cv_of_z**2/total_count)
    return lower_bound, upper_bound

In [14]:
def confidence_sort(samples, alpha):
    """Sorts by the lower bound of the wilson score confidence interval"""
    for k,v in samples.items():
        success_count = v[0]
        total_count = v[1]
        lb, ub = wilson_score_interval(success_count, total_count, alpha)
        samples[k]=lb
    confidence_sorted = sorted(samples, key=samples.__getitem__)
    return confidence_sorted

In [15]:
confidence_sort(samples, 0.05)

['s1', 's3', 's2', 's4']