Merge pull request #127 from cdonnay/update_CS

Update CS blocs
mggg · Jul 3, 2024 · fc3dedc · fc3dedc
2 parents 2cfe619 + eb16eab
commit fc3dedc
Show file tree

Hide file tree

Showing 6 changed files with 170 additions and 29 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## Added
 - Created a read the docs page.
 - Add `scale` parameter to `ballot_graph.draw()` to allow for easier reading of text labels.
+- Allow users to choose which bloc is W/C in historical Cambridge data for CambridgeSampler.
 
 ## Changed
 - Updated tutorial notebooks; larger focus on slate models, updated notebooks to match current codebase.

diff --git a/docs/social_choice_docs/scr.rst b/docs/social_choice_docs/scr.rst
@@ -369,7 +369,7 @@ candidates go in the slots.
 
 -  You can give the CS model other historical election data to use.
 
-Distance Models
+Spatial Models
 ---------------
 
 1-D Spatial
@@ -503,7 +503,7 @@ Cumulative
 Voting system where voters are allowed to vote for candidates with multiplicity.
 Each ranking position should have one candidate, and every candidate ranked will receive
 one point, i.e., the score vector is :math:`(1,\dots,1)`. Recall a score vector is a 
-vector whose :math:`i`th entry denotes the number of points given to a candidate in 
+vector whose :math:`i` th entry denotes the number of points given to a candidate in 
 position :math:`i`. Normally a score vector is non-negative and decreasing.
 
 Distances between PreferenceProfiles

diff --git a/src/votekit/ballot_generator.py b/src/votekit/ballot_generator.py
@@ -1154,8 +1154,9 @@ class CambridgeSampler(BallotGenerator):
     """
     Class for generating ballots based on historical RCV elections occurring
     in Cambridge, MA. Alternative election data can be used if specified. Assumes that there are two
-    blocs, a majority and a minority bloc, and determines this based on the ``bloc_voter_prop``
-    attr.
+    blocs, a W and C bloc, which corresponds to the historical Cambridge data.
+    By default, it assigns the W bloc to the majority bloc and C to the minority, but this
+    can be changed.
 
     Based on cohesion parameters, decides if a voter casts their top choice within their bloc
     or in the opposing bloc. Then uses historical data; given their first choice, choose a
@@ -1171,12 +1172,16 @@ class CambridgeSampler(BallotGenerator):
         cohesion_parameters (dict): Dictionary mapping of bloc string to dictionary whose
             keys are bloc strings and values are cohesion parameters,
             eg. ``{'bloc_1': {'bloc_1': .7, 'bloc_2': .2, 'bloc_3':.1}}``
-        historical_majority (str): Name of majority bloc in historical data, defaults to W for
-            Cambridge data.
-        historical_minority (str): Name of minority bloc in historical data, defaults to C for
-            Cambridge data.
-        path (str): File path to an election data file to sample from. Defaults to Cambridge
-            elections.
+        W_bloc (str, optional): Name of the bloc corresponding to the W bloc. Defaults to
+            whichever bloc has majority via ``bloc_voter_prop``.
+        C_bloc (str, optional): Name of the bloc corresponding to the C bloc. Defaults to
+            whichever bloc has minority via ``bloc_voter_prop``.
+        historical_majority (str, optional): Name of majority bloc in historical data, defaults to W
+            for Cambridge data.
+        historical_minority (str, optional): Name of minority bloc in historical data, defaults to C
+            for Cambridge data.
+        path (str, optional): File path to an election data file to sample from. Defaults to
+            Cambridge elections.
 
     Attributes:
         candidates (list): List of candidate strings.
@@ -1189,14 +1194,10 @@ class CambridgeSampler(BallotGenerator):
         cohesion_parameters (dict): Dictionary mapping of bloc string to dictionary whose
             keys are bloc strings and values are cohesion parameters,
             eg. ``{'bloc_1': {'bloc_1': .7, 'bloc_2': .2, 'bloc_3':.1}}``
-        historical_majority (str): Name of majority bloc in historical data, defaults to W for
-            Cambridge data.
-        historical_minority (str): Name of minority bloc in historical data, defaults to C for
-            Cambridge data.
-        majority_bloc (str): The name of the bloc determined to be the majority by
-            ``bloc_voter_prop``.
-        minority_bloc (str): The name of the bloc determined to be the minority by
-            ``bloc_voter_prop``.
+        W_bloc (str): The name of the W bloc.
+        C_bloc (str): The name of the C bloc.
+        historical_majority (str): Name of majority bloc in historical data.
+        historical_minority (str): Name of minority bloc in historical data.
         path (str): File path to an election data file to sample from. Defaults to Cambridge
             elections.
         bloc_to_historical (dict): Dictionary which converts bloc names to historical bloc names.
@@ -1206,6 +1207,8 @@ def __init__(
         self,
         cohesion_parameters: dict,
         path: Optional[Path] = None,
+        W_bloc: Optional[str] = None,
+        C_bloc: Optional[str] = None,
         historical_majority: Optional[str] = "W",
         historical_minority: Optional[str] = "C",
         **data,
@@ -1222,17 +1225,32 @@ def __init__(
                               passed {len(self.slate_to_candidates.keys())}"
             )
 
-        self.majority_bloc = [
-            bloc for bloc, prop in self.bloc_voter_prop.items() if prop >= 0.5
-        ][0]
+        if (W_bloc is None) != (C_bloc is None):
+            raise ValueError(
+                "Both W_bloc and C_bloc must be provided or not provided. \
+                             You have provided only one."
+            )
+
+        elif W_bloc is not None and W_bloc == C_bloc:
+            raise ValueError("W and C bloc must be distinct.")
+
+        if W_bloc is None:
+            self.W_bloc = [
+                bloc for bloc, prop in self.bloc_voter_prop.items() if prop >= 0.5
+            ][0]
+        else:
+            self.W_bloc = W_bloc
 
-        self.minority_bloc = [
-            bloc for bloc in self.bloc_voter_prop.keys() if bloc != self.majority_bloc
-        ][0]
+        if C_bloc is None:
+            self.C_bloc = [
+                bloc for bloc in self.bloc_voter_prop.keys() if bloc != self.W_bloc
+            ][0]
+        else:
+            self.C_bloc = C_bloc
 
         self.bloc_to_historical = {
-            self.majority_bloc: self.historical_majority,
-            self.minority_bloc: self.historical_minority,
+            self.W_bloc: self.historical_majority,
+            self.C_bloc: self.historical_minority,
         }
 
         if path:

diff --git a/tests/test_bg_errors.py b/tests/test_bg_errors.py
@@ -1,8 +1,6 @@
 import pytest
 
-from votekit.ballot_generator import (
-    name_PlackettLuce,
-)
+from votekit.ballot_generator import name_PlackettLuce, CambridgeSampler
 
 from votekit.pref_interval import PreferenceInterval
 
@@ -63,3 +61,45 @@ def test_incorrect_bloc_props():
             cohesion_parameters=cohesion,
             alphas=alphas,
         )
+
+
+def test_Cambridge_maj_bloc_error():
+    # need to provide both W_bloc and C_bloc
+    with pytest.raises(ValueError):
+        CambridgeSampler(
+            candidates=["W1", "W2", "C1", "C2"],
+            slate_to_candidates={"A": ["W1", "W2"], "B": ["C1", "C2"]},
+            pref_intervals_by_bloc={
+                "A": {
+                    "A": PreferenceInterval({"W1": 0.4, "W2": 0.3}),
+                    "B": PreferenceInterval({"C1": 0.2, "C2": 0.1}),
+                },
+                "B": {
+                    "A": PreferenceInterval({"W1": 0.2, "W2": 0.2}),
+                    "B": PreferenceInterval({"C1": 0.3, "C2": 0.3}),
+                },
+            },
+            bloc_voter_prop={"A": 0.7, "B": 0.3},
+            cohesion_parameters={"A": {"A": 0.7, "B": 0.3}, "B": {"B": 0.9, "A": 0.1}},
+            W_bloc="A",
+        )
+    # must be distinct
+    with pytest.raises(ValueError):
+        CambridgeSampler(
+            candidates=["W1", "W2", "C1", "C2"],
+            slate_to_candidates={"A": ["W1", "W2"], "B": ["C1", "C2"]},
+            pref_intervals_by_bloc={
+                "A": {
+                    "A": PreferenceInterval({"W1": 0.4, "W2": 0.3}),
+                    "B": PreferenceInterval({"C1": 0.2, "C2": 0.1}),
+                },
+                "B": {
+                    "A": PreferenceInterval({"W1": 0.2, "W2": 0.2}),
+                    "B": PreferenceInterval({"C1": 0.3, "C2": 0.3}),
+                },
+            },
+            bloc_voter_prop={"A": 0.7, "B": 0.3},
+            cohesion_parameters={"A": {"A": 0.7, "B": 0.3}, "B": {"B": 0.9, "A": 0.1}},
+            W_bloc="A",
+            C_bloc="A",
+        )
diff --git a/tests/test_bg_from_init.py b/tests/test_bg_from_init.py
@@ -299,6 +299,68 @@ def test_Cambridge_completion():
     assert agg_prof.num_ballots() == 100
 
 
+def test_Cambridge_completion_W_C_bloc():
+    # W as majority
+    cs = CambridgeSampler(
+        candidates=["W1", "W2", "C1", "C2"],
+        slate_to_candidates={"A": ["W1", "W2"], "B": ["C1", "C2"]},
+        pref_intervals_by_bloc={
+            "A": {
+                "A": PreferenceInterval({"W1": 0.4, "W2": 0.3}),
+                "B": PreferenceInterval({"C1": 0.2, "C2": 0.1}),
+            },
+            "B": {
+                "A": PreferenceInterval({"W1": 0.2, "W2": 0.2}),
+                "B": PreferenceInterval({"C1": 0.3, "C2": 0.3}),
+            },
+        },
+        bloc_voter_prop={"A": 0.7, "B": 0.3},
+        cohesion_parameters={"A": {"A": 0.7, "B": 0.3}, "B": {"B": 0.9, "A": 0.1}},
+        W_bloc="A",
+        C_bloc="B",
+    )
+    profile = cs.generate_profile(number_of_ballots=100)
+    assert type(profile) is PreferenceProfile
+
+    result = cs.generate_profile(number_of_ballots=100, by_bloc=True)
+    assert type(result) is tuple
+    profile_dict, agg_prof = result
+    assert isinstance(profile_dict, dict)
+    assert (type(profile_dict["A"])) is PreferenceProfile
+    assert type(agg_prof) is PreferenceProfile
+    assert agg_prof.num_ballots() == 100
+
+    # W as minority
+    cs = CambridgeSampler(
+        candidates=["W1", "W2", "C1", "C2"],
+        slate_to_candidates={"A": ["W1", "W2"], "B": ["C1", "C2"]},
+        pref_intervals_by_bloc={
+            "A": {
+                "A": PreferenceInterval({"W1": 0.4, "W2": 0.3}),
+                "B": PreferenceInterval({"C1": 0.2, "C2": 0.1}),
+            },
+            "B": {
+                "A": PreferenceInterval({"W1": 0.2, "W2": 0.2}),
+                "B": PreferenceInterval({"C1": 0.3, "C2": 0.3}),
+            },
+        },
+        bloc_voter_prop={"A": 0.7, "B": 0.3},
+        cohesion_parameters={"A": {"A": 0.7, "B": 0.3}, "B": {"B": 0.9, "A": 0.1}},
+        W_bloc="B",
+        C_bloc="A",
+    )
+    profile = cs.generate_profile(number_of_ballots=100)
+    assert type(profile) is PreferenceProfile
+
+    result = cs.generate_profile(number_of_ballots=100, by_bloc=True)
+    assert type(result) is tuple
+    profile_dict, agg_prof = result
+    assert isinstance(profile_dict, dict)
+    assert (type(profile_dict["A"])) is PreferenceProfile
+    assert type(agg_prof) is PreferenceProfile
+    assert agg_prof.num_ballots() == 100
+
+
 def test_ballot_simplex_from_point():
     candidates = ["W1", "W2", "C1", "C2"]
     pt = {"W1": 1 / 4, "W2": 1 / 4, "C1": 1 / 4, "C2": 1 / 4}

diff --git a/tests/test_bg_from_params.py b/tests/test_bg_from_params.py
@@ -201,6 +201,26 @@ def test_CS_from_params():
     profile = cs.generate_profile(3)
     assert type(profile) is PreferenceProfile
 
+    # chekc that W,C bloc assignments work
+    cs = CambridgeSampler.from_params(
+        bloc_voter_prop=blocs,
+        alphas=alphas,
+        slate_to_candidates=slate_to_cands,
+        cohesion_parameters=cohesion_parameters,
+        W_bloc="R",
+        C_bloc="D",
+    )
+
+    # check if intervals add up to one
+    assert all(
+        math.isclose(sum(cs.pref_intervals_by_bloc[curr_bloc][b].interval.values()), 1)
+        for curr_bloc in blocs.keys()
+        for b in blocs.keys()
+    )
+
+    profile = cs.generate_profile(3)
+    assert type(profile) is PreferenceProfile
+
 
 def test_interval_sum_from_params():
     blocs = {"R": 0.6, "D": 0.4}