In [1]:
import sys
import os

# Add the repo root to sys.path
repo_path = os.path.abspath(os.path.join(os.getcwd(), '../..'))  # adjust as needed
if repo_path not in sys.path:
    sys.path.insert(0, repo_path)

from src.votekit.ballot_generator import name_BradleyTerry 
from src.votekit.pref_interval import PreferenceInterval
from src.votekit.pref_profile import PreferenceProfile
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np

In [5]:
def generate_BT_args(n_cands, alpha=1):
    '''
        generates arguments for name_BradleyTerry with n_cands,
        generates single slate and single bloc, 
    '''
    # num cands goes up to 26
    cands = [chr(ord('a')+i) for i in range(n_cands)]
    bloc_name = "H"
    pref_interval = PreferenceInterval.from_dirichlet(candidates=cands, alpha=alpha)
    pref_intervals_by_bloc = {
        bloc_name: {
            bloc_name : pref_interval
        }
    }
    bloc_voter_prop = {bloc_name: 1}
    cohesion_paramteters = {
        bloc_name: {
            bloc_name: 1
        }
    }
    
    return {
        "candidates": cands,
        "pref_intervals_by_bloc": pref_intervals_by_bloc,
        "bloc_voter_prop" : bloc_voter_prop,
        "cohesion_parameters": cohesion_paramteters
    }

In [3]:
# Set-up
number_of_ballots = 500
chain_length = 10000

candidates = ["W1", "W2", "C1", "C2"]
pref_intervals_by_bloc = {
    "W": {
        "W": PreferenceInterval({"W1": 0.4, "W2": 0.3}),
        "C": PreferenceInterval({"C1": 0.2, "C2": 0.1}),
    },
    "C": {
        "C": PreferenceInterval({"C1": 0.3, "C2": 0.3}),
        "W": PreferenceInterval({"W1": 0.2, "W2": 0.2}),
    },
}
bloc_voter_prop = {"W": 0.7, "C": 0.3}
cohesion_parameters = {"W": {"W": 0.7, "C": 0.3}, "C": {"C": 0.6, "W": 0.4}}
bloc_voter_prop = {"W": 0.7, "C": 0.3}

# Generate ballots
bt = name_BradleyTerry(
    candidates=candidates,
    pref_intervals_by_bloc=pref_intervals_by_bloc,
    bloc_voter_prop=bloc_voter_prop,
    cohesion_parameters=cohesion_parameters,
)

# Even if you don't specify the length, you should have a well behaved object

# Three cases 
generated_profile = bt.generate_profile_MCMC_even_subsample(number_of_ballots=number_of_ballots)
assert generated_profile.total_ballot_wt == number_of_ballots

number_of_ballots = 100000
generated_profile = bt.generate_profile_MCMC_even_subsample(number_of_ballots=number_of_ballots)
assert generated_profile.total_ballot_wt == number_of_ballots

number_of_ballots = 100001
generated_profile = bt.generate_profile_MCMC_even_subsample(number_of_ballots=number_of_ballots, chain_length=100)
assert generated_profile.total_ballot_wt == number_of_ballots


# Length of ballot should be number_of_ballots, not chain length

# chain length < number_of_ballots --> resorts to number_of_ballots

# Continuous sampling should do worse than spaced out subsampling w.h.p. 

# Acceptance ratio should be roughly be between two values

The number of ballots before is 100000
The number of ballots after is 350
The number of ballots before is 100000
The number of ballots after is 150
The number of ballots before is 100000
The number of ballots after is 70000
The number of ballots before is 100000
The number of ballots after is 30000


ValueError: The number of ballots to be sampled is more than the chain length; supply a greater chain length.

In [4]:
def total_variation_distance(P, Q, labels):
    sum = 0 
    for x in labels:
        sum += abs(P[x] - Q[x])
    return (1/2)*sum

In [None]:
generated_profile.total_ballot_wt

In [16]:
assert generated_profile.total_ballot_wt == number_of_ballots