##**Gerrymandering-Environment**

    INITIAL STATE (provided externally via reset(options=...)):
        - 'district_map'
        - 'social_graph'
        - 'opinions'     

    ACTION:
        - new district assignment for each voter

    OBSERVATION (returned by reset/step):
        {
          'district_map'   : (num_voters,)
          'representatives': (num_districts,)  # voter indices; -1 if empty district
          'social_graph'   : (num_voters, num_voters)  # AUGMENTED: base social + rep->voter edges used for the step
          'opinions'       : (num_voters, 2)
          'opinion_graph'  : (num_voters, num_voters)  # similarity kernel derived from opinion distances
        }

    KEY LOGIC:
      - Representatives: for each district, pick the member that minimizes the sum of L2 distances to members in that district (discrete 1-median).
      - Opinion dynamics: DRF (assimilation/neutral/backfire) with weighted neighbor influence.
      - Reward: reduction in total distance to reference opinion c*

    Notes:
      - Opinion weight = 1
      - Opinion dimension is fixed at 2.
      - We accept any districting action

In [38]:
!pip install torch_geometric



In [39]:
import numpy as np
import torch
from torch_geometric.data import Data
import gymnasium as gym
from gymnasium import spaces

In [72]:
class FrankenmanderingEnv(gym.Env):
    metadata = {"render_modes": ["human"]}

    def __init__(
        self,
        num_voters: int,
        num_districts: int,
        opinion_dim: int = 2,
        horizon: int = 10,
        seed: int | None = None,

        # opinion dynamics
        eta: float = 0.2,
        a_thresh: float = 0.4,
        b_thresh: float = 1.2,
        mu_assim: float = 1.0,
        mu_backfire: float = -0.5,

        # representative influence
        rep_edge_weight: float = 1.0,

        # reward target
        reference_opinion: np.ndarray | None = None,
    ):
        super().__init__()
        self.num_voters = int(num_voters)
        self.num_districts = int(num_districts)
        self.opinion_dim = int(opinion_dim)
        self.horizon = int(horizon)
        self.rng = np.random.default_rng(seed)

        # dynamics params
        self.eta = float(eta)
        self.a_thresh = float(a_thresh)
        self.b_thresh = float(b_thresh)
        self.mu_assim = float(mu_assim)
        self.mu_backfire = float(mu_backfire)
        self.rep_edge_weight = float(rep_edge_weight)

        # target opinion c*
        if reference_opinion is None:
            self.c_star = np.zeros(self.opinion_dim, dtype=np.float32)
        else:
            self.c_star = np.asarray(reference_opinion, dtype=np.float32).reshape(self.opinion_dim)

        # spaces
        self.action_space = spaces.Box(low=0.0, high=1.0, shape=(self.num_voters, self.num_districts), dtype=np.float32)
        # observation_space is symbolic; real obs is a PyG Data
        self.observation_space = spaces.Dict({
            "x": spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_voters, self.opinion_dim), dtype=np.float32),
            "y": spaces.Box(low=0, high=self.num_districts-1, shape=(self.num_voters,), dtype=np.int64),
        })

        # state
        self.t = 0
        self._x = None
        self._pos = None
        self._edge_index = None
        self._edge_attr = None
        self._assignment = None
        self._y = None
        self._reps = None


    def reset(self, seed: int | None = None, options: dict | None = None):
        super().reset(seed=seed)
        self.t = 0

        if options is None:
            raise ValueError("reset(options=...) must provide 'opinions', 'pos', and either 'edge_index' or 'social_adj'.")

        x = np.asarray(options["opinions"], dtype=np.float32)
        pos = np.asarray(options["pos"], dtype=np.float32)
        if x.shape != (self.num_voters, self.opinion_dim):
            raise ValueError("opinions shape mismatch")
        if pos.shape != (self.num_voters, 2):
            raise ValueError("pos must be (N,2)")

        if "edge_index" in options:
            edge_index = np.asarray(options["edge_index"], dtype=np.int64)
            edge_attr = np.asarray(options.get("edge_attr", np.ones(edge_index.shape[1])), dtype=np.float32)
        else:
            adj = np.asarray(options["social_adj"], dtype=np.float32)
            edge_index, edge_attr = self._adj_to_coo(adj)

        assignment = options.get("assignment", np.full((self.num_voters, self.num_districts),
                                                      1.0 / self.num_districts, dtype=np.float32))
        assignment = self._row_normalize(assignment)
        assignment = self._ensure_non_empty_soft(assignment)

        y = assignment.argmax(axis=1).astype(np.int64)
        reps = self._elect_representatives_from_labels(y, x)

        # state
        self._x, self._pos = x, pos
        self._edge_index, self._edge_attr = edge_index, edge_attr
        self._assignment, self._y, self._reps = assignment, y, reps

        return self.as_pyg_data(), {}


    def step(self, action: np.ndarray):
      assignment, y = self.mcmc_env(action, num_steps=10)

      # Elect representatives based on final assignment
      reps = self._elect_representatives_from_labels(y, self._x)
      edge_index_aug, edge_attr_aug = self._augment_with_reps(self._edge_index, self._edge_attr, reps, y)

      # Update opinions
      x_new = self._opinion_update(edge_index_aug, edge_attr_aug, self._x)
      reward = self._reward(self._x, x_new)

      # Commit changes
      self._assignment, self._y, self._reps = assignment, y, reps
      self._x, self._edge_index, self._edge_attr = x_new, edge_index_aug, edge_attr_aug

      self.t += 1
      terminated = self.t >= self.horizon
      return self.as_pyg_data(), float(reward), terminated, False, {}


    def render(self, mode="human"):
        mean = self._x.mean(axis=0)
        print(f"[t={self.t}] mean opinion ≈ {tuple(np.round(mean,3))}")


    def as_pyg_data(self) -> Data:
        return Data(
            x=torch.tensor(self._x, dtype=torch.float32),
            y=torch.tensor(self._y, dtype=torch.long),
            pos=torch.tensor(self._pos, dtype=torch.float32),
            edge_index=torch.tensor(self._edge_index, dtype=torch.long),
            edge_attr=torch.tensor(self._edge_attr, dtype=torch.float32),
            assignment=torch.tensor(self._assignment, dtype=torch.float32),
            reps=torch.tensor([r if r is not None else -1 for r in self._reps], dtype=torch.long),
        )

    @staticmethod
    def _row_normalize(A: np.ndarray, eps=1e-8) -> np.ndarray:
        row_sum = A.sum(axis=1, keepdims=True)
        row_sum = np.clip(row_sum, eps, None)
        return A / row_sum

    def _ensure_non_empty_soft(self, A: np.ndarray) -> np.ndarray:
        y = A.argmax(axis=1)
        counts = np.bincount(y, minlength=self.num_districts)
        for d in range(self.num_districts):
            if counts[d] == 0:
                donor = counts.argmax()
                donor_nodes = np.where(y == donor)[0]
                weakest = donor_nodes[np.argmin(A[donor_nodes, donor])]
                A[weakest,:] = 0.0
                A[weakest,d] = 1.0
                y[weakest] = d
                counts = np.bincount(y, minlength=self.num_districts)
        return self._row_normalize(A)

    @staticmethod
    def _adj_to_coo(adj: np.ndarray):
        row, col = np.nonzero(adj)
        edge_index = np.vstack([row, col])
        edge_attr = adj[row, col]
        return edge_index, edge_attr

    def _elect_representatives_from_labels(self, y: np.ndarray, X: np.ndarray):
        reps = [None]*self.num_districts
        dif = X[:,None,:] - X[None,:,:]
        dists = np.linalg.norm(dif,axis=2)
        for d in range(self.num_districts):
            members = np.where(y==d)[0]
            if len(members)==0: continue
            sums = dists[np.ix_(members,members)].sum(axis=1)
            reps[d] = int(members[np.argmin(sums)])
        return reps

    def _augment_with_reps(self, edge_index, edge_attr, reps, y):
        add_src, add_dst, add_w = [], [], []
        for d, r in enumerate(reps):
            if r is None: continue
            members = np.where(y==d)[0]
            for v in members:
                if v==r: continue
                add_src.append(r)
                add_dst.append(v)
                add_w.append(self.rep_edge_weight)
        if not add_src:
            return edge_index, edge_attr
        new_ei = np.concatenate([edge_index, np.vstack([add_src, add_dst])], axis=1)
        new_ea = np.concatenate([edge_attr, np.array(add_w,dtype=np.float32)], axis=0)
        return new_ei,new_ea

    def _opinion_update(self, edge_index, edge_attr, X):
        src,dst = edge_index
        dif = X[src]-X[dst]
        dist = np.linalg.norm(dif,axis=1)
        gain = self._drf_gain(dist)
        contrib = (gain*edge_attr)[:,None]*dif
        delta = np.zeros_like(X)
        np.add.at(delta,dst,contrib)
        return X+self.eta*delta

    def _drf_gain(self, d):
        g=np.zeros_like(d,dtype=np.float32)
        g[d<=self.a_thresh]=self.mu_assim
        mid=(d>self.a_thresh)&(d<=self.b_thresh)
        g[mid]=0.0
        g[d>self.b_thresh]=self.mu_backfire
        return g

    def _reward(self, oldX,newX):
        old_d=np.linalg.norm(oldX-self.c_star[None,:],axis=1).sum()
        new_d=np.linalg.norm(newX-self.c_star[None,:],axis=1).sum()
        return old_d-new_d

    # In this approach, mcmc correct the assignment and return the opinion
    def mcmc_env(self, action: np.ndarray, num_steps: int = 10, check_steps: int = 5):

      current_assignment = self._row_normalize(action)
      current_assignment = self._ensure_non_empty_soft(current_assignment)
      current_y = current_assignment.argmax(axis=1)

      reps = self._elect_representatives_from_labels(current_y, self._x)
      edge_index_aug, edge_attr_aug = self._augment_with_reps(self._edge_index, self._edge_attr, reps, current_y)
      x_new = self._opinion_update(edge_index_aug, edge_attr_aug, self._x)
      current_reward = self._reward(self._x, x_new)

      for _ in range(num_steps):
          proposal = current_assignment.copy()
          voter = self.rng.integers(0, self.num_voters)
          new_d = self.rng.integers(0, self.num_districts)
          proposal[voter,:] = 0
          proposal[voter,new_d] = 1.0
          proposal = self._row_normalize(proposal)
          proposal = self._ensure_non_empty_soft(proposal)
          proposal_y = proposal.argmax(axis=1)

          check_assignment = proposal.copy()
          check_y = proposal_y.copy()
          check_reward = current_reward

          for _ in range(check_steps):
              temp = check_assignment.copy()
              v = self.rng.integers(0, self.num_voters)
              d = self.rng.integers(0, self.num_districts)
              temp[v,:] = 0
              temp[v,d] = 1.0
              temp = self._row_normalize(temp)
              temp = self._ensure_non_empty_soft(temp)
              temp_y = temp.argmax(axis=1)

              reps = self._elect_representatives_from_labels(temp_y, self._x)
              edge_index_aug, edge_attr_aug = self._augment_with_reps(self._edge_index, self._edge_attr, reps, temp_y)
              x_new = self._opinion_update(edge_index_aug, edge_attr_aug, self._x)
              temp_reward = self._reward(self._x, x_new)

              if temp_reward > check_reward:
                  check_assignment, check_y, check_reward = temp, temp_y, temp_reward

          if check_reward > current_reward:
              current_assignment, current_y, current_reward = check_assignment, check_y, check_reward

      return current_assignment, current_y


In [73]:
N = 16
K = 4
T = 8

opinions = np.linspace(-1,1,N)[:,None]
opinions = np.hstack([opinions, np.zeros((N,1))])
pos = np.arange(N)[:,None]
pos = np.hstack([pos, np.zeros_like(pos)])

edges = [(i,i+1) for i in range(N-1)]
edge_index = np.array(edges + [(j,i) for i,j in edges]).T
edge_attr = np.ones(edge_index.shape[1])

env = FrankenmanderingEnv(num_voters=N, num_districts=K, opinion_dim=2, horizon=T)

obs,_ = env.reset(options={
    "opinions": opinions,
    "pos": pos,
    "edge_index": edge_index,
    "edge_attr": edge_attr
})

for t in range(T):
    action = np.ones((N,K), dtype=np.float32)/K
    obs, reward, done, _, _ = env.step(action)
    print(f"t={t}, reward={reward:.3f}, mean opinion={obs.x.mean(0)}")


t=0, reward=0.347, mean opinion=tensor([0.0017, 0.0000])
t=1, reward=0.597, mean opinion=tensor([-0.0037,  0.0000])
t=2, reward=0.553, mean opinion=tensor([-0.0097,  0.0000])
t=3, reward=0.456, mean opinion=tensor([-0.0176,  0.0000])
t=4, reward=0.584, mean opinion=tensor([-0.0272,  0.0000])
t=5, reward=0.501, mean opinion=tensor([-0.0254,  0.0000])
t=6, reward=0.084, mean opinion=tensor([-0.0137,  0.0000])
t=7, reward=0.233, mean opinion=tensor([-0.0111,  0.0000])
