### Imports

In [None]:
import os
import math

import numpy as np
import scipy.special as spec
import itertools as itt

import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
sns.set()

In [None]:
# import mixtureofconcave as subm
# import determinantal as logsubm
# import plottingtools

### KL divergence

In [None]:
def kld_part_uniform(n, k, groupcounts, groupbudgets):
    """ Computes D_{KL}(\rho^{\textrm{part}}||\rho^{\textrm{card}})
        = log( |{S : |S| = k}| / |{S : |S ∩ Vi| ≥ ki ∀ i ∈ [p]; |S| = k}| )
        ... but this is independent of groupcounts!
    """
    
    assert np.sum(groupcounts) == n, "group assignment not adding up"
    assert np.sum(groupbudgets) <= k, "group budgets exceeding total"
    assert (groupbudgets <= groupcounts).all(), "not enough members"
    
    p = len(groupbudgets)
    
    # lol no! this is not correct!?
    Zpart = 1
    for jj in range(p):
        Zpart *= spec.binom(groupcounts[jj], groupbudgets[jj])
    
    raise NotImplementedError
    return np.log( spec.binom(n,k) / Zpart)

In [None]:
def kld_quota_uniform(n, k, groupcounts, groupbudgets):
    """ Computes D_{KL}(\rho^{\textrm{quota}}||\rho^{\textrm{card}})
    """
    
    assert np.sum(groupcounts) == n, "group assignment not adding up"
    assert np.sum(groupbudgets) <= k, "group budgets exceeding total"
    assert (groupbudgets <= groupcounts).all(), "not enough members"
    
    p = len(groupbudgets)
    ktilda = k - np.sum(groupbudgets)
    
    Zquota = 0
    for spl in itt.combinations(np.arange(ktilda+p-1),p-1):
        kspl = groupbudgets.copy()
        kspl[0] += spl[0]
        for jj in range(1, len(spl)):
            kspl[jj] += spl[jj] - spl[jj-1] - 1
        kspl[-1] += ktilda+p-1 - spl[-1] - 1
        
        assert np.sum(kspl) == k, "split assignment not adding up"
        
        Zspl = 1
        for jj in range(p):
            Zspl *= spec.binom(groupcounts[jj], kspl[jj])
        
        Zquota += Zspl
    
    raise NotImplementedError
    return np.log( spec.binom(n,k) / Zquota)

### Plot as a function of n

In [None]:
k = 15  # total selection capacity
groupbudgets = np.array([2,3,1,2,4])  # k - sum(groupbudgets) = "free" places = 3
p = len(groupbudgets)  # number of groups

In [None]:
# choices for |V|
nn = np.array([20,50,100,150,200,250,300,350,400,450,500])

In [None]:
kldp = np.zeros_like(nn).astype(float)
np.random.seed(0)
for nidx in range(len(nn)):
    # for a given |V| = nn[nidx],
    groupcounts = np.array([0,]*p)  # all groups empty
    for ii in range(nn[nidx]):
        # assign each element to a random group
        # note: we only care about the final counts,
        # so we do't keep track of fungible elements
        groupcounts[np.random.choice(p)] += 1
    
    # Compute D_{KL}(\rho^{\textrm{part}}||\rho^{\textrm{card}})
    # ... but that's a distribution over all possible groupcounts!
    # not an expression evaluated a a specific groupcount!
    kldp[nidx] = kld_part_uniform(nn[nidx], k, groupcounts, groupbudgets)

In [None]:
kldq = np.zeros_like(nn).astype(float)
np.random.seed(0)
for nidx in range(len(nn)):
    groupcounts = np.array([0,]*p)
    for ii in range(nn[nidx]):
        groupcounts[np.random.choice(p)] += 1
        
    kldq[nidx] = kld_quota_uniform(nn[nidx], k, groupcounts, groupbudgets)

In [None]:
kldp, kldq

In [None]:
plt.plot(nn, kldp, marker="o", label="partition constraint")
plt.plot(nn, kldq, marker="o", label="quota constraint")
plt.xlabel("|V|")
plt.title("KL divergence of uniform distributions over")
plt.xlim([-10,510])
plt.legend()

### KL divergence -- DPP

In [None]:
def kld_part_uniform(n, k, groupcounts, groupbudgets):
    """ Computes D_{KL}(\rho^{\textrm{part}}||\rho^{\textrm{card}})
    """
    
    assert np.sum(groupcounts) == n, "group assignment not adding up"
    assert np.sum(groupbudgets) <= k, "group budgets exceeding total"
    assert (groupbudgets <= groupcounts).all(), "not enough members"
    
    p = len(groupbudgets)
    
    Zpart = 1
    for jj in range(p):
        Zpart *= spec.binom(groupcounts[jj], groupbudgets[jj])
    
    return np.log( spec.binom(n,k) / Zpart)

In [None]:
def kld_quota_uniform(n, k, groupcounts, groupbudgets):
    """ Computes D_{KL}(\rho^{\textrm{quota}}||\rho^{\textrm{card}})
    """
    
    assert np.sum(groupcounts) == n, "group assignment not adding up"
    assert np.sum(groupbudgets) <= k, "group budgets exceeding total"
    assert (groupbudgets <= groupcounts).all(), "not enough members"
    
    p = len(groupbudgets)
    ktilda = k - np.sum(groupbudgets)
    
    Zquota = 0
    for spl in itt.combinations(np.arange(ktilda+p-1),p-1):
        kspl = groupbudgets.copy()
        kspl[0] += spl[0]
        for jj in range(1, len(spl)):
            kspl[jj] += spl[jj] - spl[jj-1] - 1
        kspl[-1] += ktilda+p-1 - spl[-1] - 1
        
        assert np.sum(kspl) == k, "split assignment not adding up"
        
        Zspl = 1
        for jj in range(p):
            Zspl *= spec.binom(groupcounts[jj], kspl[jj])
        
        Zquota += Zspl
    
    return np.log( spec.binom(n,k) / Zquota)

The offline problem...<br>

Datasets : just use self-curated ones (obtain features, scoring done by rouge or by a learnt submodular function)

Define groups ourselves -- visually diverse spaces as groups, or some features (color, gender of humans) correlating with groups, or entirely random groups)<br>
and test the performance of greedy variants

The online problem... in particular, drawing fair samples from a distribution<br>

I am close to showing that the ratio of the counts of allowed