##**Gerrymandering-Environment**

    INITIAL STATE (provided externally via reset(options=...)):
        - 'district_map'
        - 'social_graph'
        - 'opinions'     

    ACTION:
        - new district assignment for each voter

    OBSERVATION (returned by reset/step):
        {
          'district_map'   : (num_voters,)
          'representatives': (num_districts,)  # voter indices; -1 if empty district
          'social_graph'   : (num_voters, num_voters)  # AUGMENTED: base social + rep->voter edges used for the step
          'opinions'       : (num_voters, 2)
          'opinion_graph'  : (num_voters, num_voters)  # similarity kernel derived from opinion distances
        }

    KEY LOGIC:
      - Representatives: for each district, pick the member that minimizes the sum of L2 distances to members in that district (discrete 1-median).
      - Opinion dynamics: DRF (assimilation/neutral/backfire) with weighted neighbor influence.
      - Reward: reduction in total distance to reference opinion c*

    Notes:
      - Opinion weight = 1
      - Opinion dimension is fixed at 2.
      - We accept any districting action

In [1]:
!pip install torch_geometric



In [2]:
import numpy as np
import torch
from torch_geometric.data import Data
import gymnasium as gym
from gymnasium import spaces
import gerry_environment

In [3]:
def simulated_annealing_baseline(env, num_iterations=500, initial_beta=0.1, cooling_rate=1.01, rng=None):
    if rng is None:
        rng = np.random.default_rng()

    # Get the current state (opinions) from the environment
    current_opinions = env._x

    # Initialize a random valid assignment
    current_assignment = np.eye(env.num_districts)[rng.integers(0, env.num_districts, size=env.num_voters)]
    current_assignment = env._ensure_non_empty(current_assignment)

    # Evaluate the initial assignment
    current_y = current_assignment.argmax(axis=1)
    reps = env._elect_representatives_from_labels(current_y, current_opinions)
    edge_index_aug, edge_attr_aug = env._augment_with_reps(env._edge_index, env._edge_attr, reps, current_y)
    x_new = env._opinion_update(edge_index_aug, edge_attr_aug, current_opinions)
    current_reward = env._reward(current_opinions, x_new)

    # Track the best assignment found during the entire search
    best_assignment = current_assignment.copy()
    best_reward = current_reward

    beta = initial_beta

    for i in range(num_iterations):
        # 1. Propose a new assignment (single-voter reassignment)
        proposal_assignment = current_assignment.copy()
        voter = rng.integers(0, env.num_voters)
        new_d = rng.integers(0, env.num_districts)
        proposal_assignment[voter, :] = 0
        proposal_assignment[voter, new_d] = 1.0
        proposal_assignment = env._ensure_non_empty(proposal_assignment)

        # 2. Evaluate the proposal
        proposal_y = proposal_assignment.argmax(axis=1)
        reps = env._elect_representatives_from_labels(proposal_y, current_opinions)
        edge_index_aug, edge_attr_aug = env._augment_with_reps(env._edge_index, env._edge_attr, reps, proposal_y)
        x_new = env._opinion_update(edge_index_aug, edge_attr_aug, current_opinions)
        proposal_reward = env._reward(current_opinions, x_new)

        # 3. Metropolis-Hastings Acceptance Rule
        if proposal_reward > current_reward:
            # Always accept a better proposal
            current_assignment = proposal_assignment
            current_reward = proposal_reward
        else:
            # Accept a worse proposal with a probability
            acceptance_prob = np.exp(beta * (proposal_reward - current_reward))
            if rng.random() < acceptance_prob:
                current_assignment = proposal_assignment
                current_reward = proposal_reward

        # 4. Update the best assignment found so far
        if current_reward > best_reward:
            best_assignment = current_assignment.copy()
            best_reward = current_reward

        # 5. Annealing: Increase beta over time (cool the system)
        beta *= cooling_rate

    return best_assignment, best_reward

In [4]:
num_voters = 10
num_districts = 8
T = 8
triple_size = 3
opinion_dim = 1

opinions = np.zeros((num_voters, opinion_dim))
opinions[:, 0] = np.linspace(-1, 1, num_voters)

pos = np.arange(num_voters)[:,None]
pos = np.hstack([pos, np.zeros_like(pos)])
edges = [(i,i+1) for i in range(num_voters-1)]
edge_index = np.array(edges + [(j,i) for i,j in edges]).T
edge_attr = np.ones(edge_index.shape[1])

env = gerry_environment.FrankenmanderingEnv(num_voters, num_districts, opinion_dim, horizon=T)

obs, _ = env.reset(options={
    "opinions": opinions,
    "pos": pos,
    "edge_index": edge_index,
    "edge_attr": edge_attr
})

In [5]:
for t in range(T):
    # Run MCMC/SA to find the best assignment given the current state
    best_assignment, best_reward = simulated_annealing_baseline(
        env,
        num_iterations=2000,
        initial_beta=0.1,
        cooling_rate=1.001
    )

    # Apply this assignment in the environment
    obs, reward, done, _, _ = env.step(best_assignment)

    # Immediately run MCMC again on the new state to "check" or refine
    refined_assignment, refined_reward = simulated_annealing_baseline(
        env,
        num_iterations=1000,
        initial_beta=0.1,
        cooling_rate=1.001
    )

    # (Optional) Replace with refined assignment if it’s better
    if refined_reward > reward:
        obs, reward, done, _, _ = env.step(refined_assignment)

    print(f"t={t}, reward={reward:.3f}, mean opinion={obs.x.mean(0)}")

    if done:
        break


t=0, reward=3.111, mean opinion=tensor([-1.1921e-08])
t=1, reward=-0.222, mean opinion=tensor([0.])
t=2, reward=0.889, mean opinion=tensor([-0.2000])
t=3, reward=0.444, mean opinion=tensor([0.])
t=4, reward=-0.889, mean opinion=tensor([-0.2000])
