# Projected outcomes

This notebook contains the code we used to project outcomes under the 10x5 and 50x1 districting plans using demographic threshold inference model (see section 5 of the report).

In [1]:
import pandas
import numpy
from collections import namedtuple
from tqdm import tqdm
import matplotlib.pyplot as plt
import random
import itertools
import operator

In [2]:
tqdm.pandas()

## Projection model code

In [3]:
groups = ["Asian", "Black", "Hispanic", "White"]
abbreviations = ["A", "B", "H", "W"]
mapping = {
    "".join(str(ranking.index(initial) + 1) for initial in abbreviations): "".join(ranking)
    for ranking in itertools.permutations(abbreviations)
}

def encoded_rankings(data):
    ranked = data.rank(ascending=False, axis=1)
    encoded = ranked.astype(int).astype(str).sum(axis=1).astype(int).astype(str)
    return encoded.map(mapping)

In [4]:
class ElectionSampler:
    def __init__(self, cases, threshold):
        self.cases = cases
        self.threshold = threshold

    def sample(self, data: pandas.DataFrame):
        """
        :param pandas.DataFrame data: a DataFrame with the percentages
            of each racial group in a ward.
        :param int or None number_of_seats: if not None, normalize
            the result so that it sums to ``number_of_seats``. Userful
            if you want to interpret the result as a projected number
            of wins for each group.
        """
        cases = self.orderings_and_cases(data)
        outcomes = cases.progress_apply(lambda x: self.cases[x["case"]].random_outcomes(x["ordering"]), axis=1)
        return outcomes

    def expectation(self, data: pandas.DataFrame):
        grouped_cases = self.orderings_and_cases(data).groupby(["ordering", "case"]).size()
        total_wards = grouped_cases.sum()
        outputs = pandas.DataFrame.from_records(
            self.cases[case].expected_value(ordering) * size
            for (ordering, case), size in grouped_cases.items()
        )
        return outputs.sum() / total_wards
    
    def orderings_and_cases(self, data: pandas.DataFrame):
        return pandas.DataFrame({
            "ordering": encoded_rankings(data),
            "case": self.decide_case_strings(data)
        })

    def decide_case_strings(self, data: pandas.DataFrame):
        number_over_threshold = (data > self.threshold).sum(axis=1)
        majority = (data > 0.5).any(axis=1).map({True: "M", False: "P"})
        return number_over_threshold.astype(str) + majority


In [5]:
class Case:
    def __init__(self, outcomes, probabilities, number_of_groups=4):
        assert len(outcomes) == len(probabilities)
        assert all(isinstance(x, tuple) for x in outcomes)
        assert 0.99 <= sum(probabilities) <= 1.01
        assert len(set(sum(outcome) for outcome in outcomes)) == 1
        
        self.outcomes = numpy.zeros((len(outcomes), number_of_groups))
        for i, outcome in enumerate(outcomes):
            for j, wins in enumerate(outcome):
                self.outcomes[i, j] = wins
        
        self.outcome_indices = numpy.arange(len(outcomes))
        self.probabilities = numpy.reshape(numpy.asarray(probabilities), len(probabilities))
    
    def __iter__(self):
        return iter((self.outcomes, self.probabilities))
    
    def random_outcomes(self, ordering, index=None):
        if index is None:
            size = 1
        else:
            size = len(index)
        
        outcome_indices = numpy.random.choice(
            a=self.outcome_indices,
            p=self.probabilities,
            size=size
        )
        return pandas.DataFrame(
            self.outcomes[outcome_indices, :],
            columns=list(ordering),
            index=index
        )
    
    def expected_value(self, ordering):
        return pandas.DataFrame(
            self.outcomes * self.probabilities[:, numpy.newaxis],
            columns=list(ordering)
        ).sum()

In [6]:
fifty_by_one = ElectionSampler(
    cases = {
        "1M": Case([(1,0), (0,1)], (0.98, 0.02)),
        "2M": Case([(1,0,0), (0,1,0), (0,0,1)], (0.8, 0.18, 0.02)),
        "1P": Case([(1,0,0), (0,1,0), (0,0,1)], (0.8, 0.18, 0.02)),
        "2P": Case([(1,0,0), (0,1,0), (0,0,1)], (0.60, 0.30, 0.10)),
        "3P": Case([(1,0,0), (0,1,0), (0,0,1)], (0.50, 0.30, 0.20)),    
    },
    threshold = 0.25
)

In [7]:
cases10x5 = {
    "1M": Case([(5,0),(4,1)], (0.60, 0.40)),
    "2M": Case([(4, 1), (3, 2), (2, 3), (3, 1, 1), (2,2, 1)], 
               (.60, .30 , .05,  .03,  .02)),
    "3M": Case([(3, 1, 1), (2,2,1), (2,1,2)], (.6, 0.3, 0.1)),
    "1P": Case([(5,0), (4,1), (4,0,1), (3,1,1)], (0.5, 0.4, 0.05, 0.05)),
    "2P": Case([(4,1), (3,2), (2,3), (3,1,1), (2,2,1)],
               (0.5, 0.35, 0.1, 0.03, 0.02)),
    "3P": Case([(3, 1, 1), (2,2,1), (2,1,2)], (.5, 0.35, 0.15)),
    "4P": Case([(2, 1, 1, 1), (1, 2,1,1), (1,1,2,1)], (.5, 0.35, 0.15))
}

ten_by_five = ElectionSampler(
    cases=cases10x5,
    threshold = 1/6
)

In [8]:
cases10x3 = {
    "1M": Case([(3,0,0), (2,1,0), (2,0,1)], (0.9, 0.08, 0.02)),
    "2M": Case([(3,0,0), (2,1,0), (1,2,0), (1,1,1)], (0.1, 0.7, 0.15, 0.05)),
    "1P": Case([(3,0,0), (2,1,0), (2,0,1)], (0.6, 0.38, 0.02)),
    "2P": Case([(3,0,0), (2,1,0), (1,2,0), (1,1,1)], (0.05, 0.65, 0.25, 0.05)),
    "3P": Case([(3,0,0), (2,1,0), (2,0,1), (1,2,0), (1,1,1)],
                (0.02, 0.05, 0.02, 0.01, 0.90)),
}

ten_by_three = ElectionSampler(cases10x3, threshold=0.25)

In [9]:
def load_data(filename):
    df = pandas.read_csv(filename)
    data = pandas.DataFrame(
        {
            "step": df["step"],
            "ward": df["ward"],
            "White": df["NH_WHITE"] / df["TOTPOP"],
            "Black": df["NH_BLACK"] / df["TOTPOP"],
            "Hispanic": df["HISP"] / df["TOTPOP"],
            "Asian": df["NH_ASIAN"] / df["TOTPOP"],
        }
    )
    return data

## Expectations

In [10]:
data50x1 = load_data("../ensembles/prec50/results.csv")

In [14]:
data50x1 = None

In [10]:
data10xM = load_data("../ensembles/prec10/results.csv")

In [11]:
data10xM_CA = load_data("../ensembles/ca10/results.csv")

In [12]:
samplers = {
    "50x1": fifty_by_one,
    "10x5": ten_by_five,
    "10x5_CA": ten_by_five,
    "10x3": ten_by_three,
    "10x3_CA": ten_by_three
}

In [15]:
data = {
    "50x1": data50x1,
    "10x5": data10xM,
    "10x5_CA": data10xM_CA,
    "10x3": data10xM,
    "10x3_CA": data10xM_CA
}

In [16]:
number_of_seats = {
    "50x1": 50,
    "10x5": 50,
    "10x5_CA": 50,
    "10x3": 30,
    "10x3_CA": 30
}

number_of_wards = {
    "50x1": 50,
    "10x5": 10,
    "10x5_CA": 10,
    "10x3": 10,
    "10x3_CA": 10
}

In [16]:
expectations = pandas.DataFrame({
    key: samplers[key].expectation(data[key][groups]) * number_of_wards[key]
    for key in samplers
})

In [17]:
expectations.to_csv("./expectations.csv")

## Sensitivity analysis

In [18]:
alternate_50x1_cases = {
    "1M": [Case([(1,0), (0,1)], (0.90, 0.10))],
    "2M": [
        Case([(1,0,0), (0,1,0), (0,0,1)], (0.7, 0.30, 0.00)),
        Case([(1,0,0), (0,1,0), (0,0,1)], (0.7, 0.20, 0.10)),
        Case([(1,0,0), (0,1,0), (0,0,1)], (0.9, 0.10, 0.00))
    ],
    "1P": [
        Case([(1,0,0), (0,1,0), (0,0,1)], (0.7, 0.30, 0.00)),
        Case([(1,0,0), (0,1,0), (0,0,1)], (0.7, 0.20, 0.10)),
        Case([(1,0,0), (0,1,0), (0,0,1)], (0.9, 0.10, 0.00))
    ],
    "2P": [Case([(1,0,0), (0,1,0), (0,0,1)], (0.60, 0.40, 0.00)),
        Case([(1,0,0), (0,1,0), (0,0,1)], (0.60, 0.30, 0.10))],
    "3P": [Case([(1,0,0), (0,1,0), (0,0,1)], (0.60, 0.30, 0.10)),
           Case([(1,0,0), (0,1,0), (0,0,1)], (0.40, 0.30, 0.30))]
}

In [19]:
alternate_10x5_cases = {
    "1M": [Case([(5,0),(4,1)], (0.50, 0.50)),
           Case([(5,0),(4,1)], (0.80, 0.20))],
    "2M": [Case([(4, 1), (3, 2), (2, 3)], 
               (.60, .30 , .10)),
           Case([(4, 1), (3, 2), (2, 3)], 
               (.50, .45 , .05))],
    "3M": [Case([(3, 1, 1), (2,2,1), (2,1,2)], (.5, 0.3, 0.2)),
           Case([(3, 1, 1), (3,2), (2,3), (2,2,1), (2,1,2)],
                (.25, 0.25, 0.15, 0.2, 0.15))],
    "1P": [Case([(5,0),(4,1)], (0.50, 0.50)),
           Case([(5,0),(4,1)], (0.80, 0.20))],
    "2P": [Case([(4,1), (3,2), (2,3)],
               (0.5, 0.35, 0.15)),
            Case([(4,1), (3,2), (2,3)],
               (0.40, 0.50, 0.10))],
    "3P": [Case([(3, 1, 1), (2,2,1), (2,1,2)], (.5, 0.30, 0.20)),
           Case([(3, 1, 1), (3,2), (2,3), (2,2,1), (2,1,2)],
                (.25, 0.25, 0.15, 0.2, 0.15))],
    "4P": [Case([(2, 1, 1, 1),(1, 2,1,1), (1,1,2,1)], (.80, 0.10, 0.10)),
           Case([(1,1,1,1)], (1,))]
}

In [20]:
alternate_10x3_cases = {
    "1M": [Case([(3,0,0), (2,1,0)], (0.9, 0.10)),
          Case([(3,0,0), (2,1,0)], (0.6, 0.40))],
    "2M": [Case([(3,0,0), (2,1,0), (1,2,0), (1,1,1)],
                (0.3, 0.4, 0.2, 0.1)),
           Case([(3,0,0), (2,1,0), (1,2,0)],
                (0.05, 0.85, 0.1)),
          ],
    "1P": [Case([(3,0,0), (2,1,0)], (0.6, 0.40)),
           Case([(3,0,0), (2,1,0), (2,0,1)], (0.7, 0.20, 0.10)),
          ],
    "2P": [Case([(3,0,0), (2,1,0), (1,2,0), (1,1,1)],
                (0.20, 0.45, 0.25, 0.10)),
           Case([(3,0,0), (2,1,0), (1,2,0)],
                (0.05, 0.65, 0.30)),
          ],
    "3P": [Case([(1,1,1)],
                (1,)),
           Case([(3,0,0), (2,1,0), (2,0,1), (1,1,1)],
                (0.10, 0.05, 0.05, 0.80)),
          ]
}


In [21]:
alternates = {
    "50x1": alternate_50x1_cases,
    "10x5": alternate_10x5_cases,
    "10x3": alternate_10x3_cases,
    "10x3_CA": alternate_10x3_cases,
    "10x5_CA": alternate_10x5_cases
}

In [22]:
def alternate_samplers():
    for original, alternate_cases in alternates.items():
        for case_str, cases in alternate_cases.items():
            for i, case in enumerate(cases):
                key = "{}_{}_{}".format(original, case_str, i)
                new_cases = samplers[original].cases.copy()
                new_cases[case_str] = case
                new_sampler = ElectionSampler(
                    new_cases, samplers[original].threshold
                )
                yield (key, original, new_sampler)

In [24]:
alternate_expectations = pandas.DataFrame({
    sampler_key: new_sampler.expectation(data[original_key][groups]) * number_of_wards[original_key]
    for (sampler_key, original_key, new_sampler) in alternate_samplers()
})

In [25]:
alternate_expectations.to_csv("./alternates.csv")