In [2]:
import tensorflow as tf
from elections.Ballot import Ballot
from elections.DefaultConfigOptions import *
from elections.ElectionConstructor import ElectionConstructor, construct_irv, construct_h2h
from elections.NDPopulation import NDPopulation
from network.Tensor import Tensor
from network.ElectionModel import ElectionModel, ElectionModelTrainer
import pickle
import matplotlib.pyplot as plt

In [3]:
n_bins = 21
max_ideology = 1.5
min_ideology = -1.5
ideology_range = max_ideology - min_ideology
ideology_dim = 1
n_voters = 100

In [4]:
def create_model_and_population(ideology_bins: int, ideology_dim: int) -> (ElectionModel, NDPopulation):
    ideology_dim = 1
    ideology_bins = 32
    hidden_ratio = 4

    model = ElectionModel(n_bins, 256, 4)

    population_means = np.zeros(shape=(ideology_dim,))
    population_stddev = np.ones(shape=(ideology_dim,))
    pop = NDPopulation(population_means, population_stddev)
    return model, pop

In [5]:
def gen_random_candidates(population: NDPopulation, n: int)-> List[Candidate]:
    candidates = []
    while len(candidates) < n:
        ivec = population.unit_sample_voter().ideology.vec * .5
        if min_ideology < ivec[0] < max_ideology:
            candidates.append(Candidate(f"c-{len(candidates)}", Independents, Ideology(ivec), 0))

    return candidates

def run_sample_election(process: ElectionConstructor, population: NDPopulation, n_voters: int):
    candidates = gen_random_candidates(population, 5)
    voters = population.generate_unit_voters(n_voters)
    ballots = [Ballot(v, candidates, unit_election_config) for v in voters]
    result = process.run(ballots, set(candidates))
    winner = result.winner()
    return candidates, winner

def convert_ideology_to_bin(ideology: float) -> int:
    ideology = max(min_ideology, min(ideology, max_ideology))
    pct = (ideology - min_ideology) / ideology_range
    return int(pct * n_bins)

# note that the return value is the index of the winning candidate and NOT the
# bin of the winning candidate.
def create_training_sample(candidates: List[Candidate], winner: Candidate) -> (int, list[int]):
    w = candidates.index(winner)
    cc = [convert_ideology_to_bin(c.ideology.vec[0]) for c in candidates]
    return cc, w

In [6]:
import tensorflow as tf
import numpy as np

class ElectionMemory:
    def __init__(self, max_size: int, n_bins: int):
        self.max_size = max_size
        self.count = 0
        self.x: np.array = np.zeros(shape=(max_size, n_bins), dtype=np.single)
        self.mask: np.array = np.zeros(shape=(max_size, n_bins), dtype=np.single)
        self.y: np.array = np.zeros(shape=(max_size, n_bins), dtype=np.single)
        self.n_bins = n_bins


    # have to deal with the possibility that there will be two candidates
    # with same 'bin' and that one of them will have won.  Thus, we only
    # set the y to 1 for the candidate that won, a nearly identical example
    # will be created for the other candidate where y is 0.
    def add_sample(self, candidates: List[int], winner_index: int):
        for i in range(len(candidates)):
            x = np.zeros(shape=(1,self.n_bins), dtype=np.single)
            y = np.zeros(shape=(1, self.n_bins), dtype=np.single)
            mask = np.zeros(shape=(1,self.n_bins), dtype=np.single)
            for j in range(len(candidates)):
                if i != j:
                    x[0, candidates[j]] = 1

            if i == winner_index:
                y[0, candidates[winner_index]] = 1
            mask[0, candidates[i]] = 1

            self.add_sample_np(x, mask, y)

    def add_sample_np(self, x: np.ndarray, mask: np.ndarray, y: np.ndarray):
        sr = self.count % self.max_size
        self.count += x.shape[0]

        er = sr + x.shape[0]
        self.x[sr:er] = x
        self.mask[sr:er] = mask
        self.y[sr:er] = y


    def get_batch(self, batch_size) -> (np.ndarray, np.ndarray, np.ndarray):
        indices = np.random.randint(0, min(self.max_size, self.count), batch_size)
        return tf.gather(self.x, indices), tf.gather(self.mask, indices), tf.gather(self.y, indices)





In [7]:
def populate_dummy_memory() -> ElectionMemory:
    m = ElectionMemory(100000, n_bins)
    n_candidates = 5
    for i in range(100000):
        cc = [random.randrange(0, n_bins) for _ in range(n_candidates)]
        w = random.randrange(0, 5)
        m.add_sample(cc, w)

    return m

def test_election_memory():
    m = populate_dummy_memory()
    x, a, y = m.get_batch(4)
    print("x = ", x)
    print("a = ", a)
    print("y = ", y)

    print("done")
# test_election_memory()

In [8]:
class LossTracker():
    def __init__(self, count):
        self.count = count
        self.losses = np.zeros(shape=(count))
        self.idx = 0
        self.sum = 0

    def add_loss(self, l) -> float:
        idx = self.idx % self.count
        if self.idx >= self.count:
            self.sum -= self.losses[idx]
        self.sum += l
        self.losses[idx] = l
        self.idx += 1
        return self.sum / min(self.count, self.idx)

In [9]:
def populate_memory(count: int) -> ElectionMemory:
    m = ElectionMemory(count * 5, n_bins)
    population_means = np.zeros(shape=(ideology_dim,))
    population_stddev = np.ones(shape=(ideology_dim,))
    population = NDPopulation(population_means, population_stddev)
    process = ElectionConstructor(construct_irv, "IRV")
    print(f"populating training memory with {count * 5} samples")
    for i in range(count):
        c, w = run_sample_election(process, population, n_voters)
        ci, wi = create_training_sample(c, w)
        m.add_sample(ci, wi)
        if i % 100 == 0:
            print(".", end='')
    print(f"\nm.count {m.count}")
    return m

In [15]:
def train_network(net: ElectionModel,  memory: ElectionMemory, n_batches: int, batch_size: int):
    print(f"training network for {n_batches} epochs")
    tracker = LossTracker(1000)
    trainer = ElectionModelTrainer(net)
    current_path = ""
    average_loss = 0

    for i in range(n_batches):
        x, a, y = memory.get_batch(batch_size)
        loss = trainer.update(x, a, y)
        if np.isnan(loss):
            print(f"loss is nan, reverting to {current_path}")
            net = tf.keras.models.load_model(current_path)
            trainer = ElectionModelTrainer(net)
        else:
            average_loss = tracker.add_loss(loss)
            if i % 1000 == 0:
                print(f"epoch {i:5d} loss = {average_loss:.6}")
                current_path = f"mdl.sav.{i}"
                net.save(current_path, overwrite=True)

    return net, average_loss

In [16]:
def save_model_test():
    election_memory = populate_memory(1000)
    network = ElectionModel(n_bins, 256, 4)
    model_trainer = ElectionModelTrainer(network)
    train_network(network, election_memory, 100, 32)

    print("saving network")
    network.save("cm.sav", overwrite=True)
    print("loading n1")
    n1: ElectionModel = tf.keras.models.load_model("cm.sav")
    print("saving n1")
    n1.save("cm1.sav", overwrite=True)

    x, mask, y = election_memory.get_batch(1)

    print("network(x)")
    print(network(x))
    print("n1(x)")
    print(n1(x))
    print("bins: ", n1.n_bins)

save_model_test()

populating training memory with 5000 samples
..........
m.count 5000
training network for 100 epochs
epoch     0 loss = 0.0424696
saving network
loading n1
saving n1
network(x)
tf.Tensor(
[[0.00063517 0.00029125 0.00029273 0.0003061  0.00221351 0.01348361
  0.02518798 0.02806755 0.21195006 0.10257724 0.19252694 0.11366741
  0.14490388 0.08602922 0.05455501 0.01734561 0.00214756 0.0026406
  0.00029359 0.00038627 0.00049872]], shape=(1, 21), dtype=float32)
n1(x)
tf.Tensor(
[[0.00063517 0.00029125 0.00029273 0.0003061  0.00221351 0.01348361
  0.02518798 0.02806755 0.21195006 0.10257724 0.19252694 0.11366741
  0.14490388 0.08602922 0.05455501 0.01734561 0.00214756 0.0026406
  0.00029359 0.00038627 0.00049872]], shape=(1, 21), dtype=float32)




AttributeError: 'ElectionModel' object has no attribute 'n_bins'

In [12]:
election_memory = populate_memory(50000)

populating training memory with 250000 samples
....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
m.count 250000


In [13]:
def grid_search():
    tf.get_logger().setLevel('WARN')
    for batch_size in [2048]:
        for width in [256, 512]:
            for layers in [3, 4, 5]:
                network = ElectionModel(n_bins, width, layers)
                network, loss = train_network(network, election_memory, 10000, batch_size)
                print(f"batch_size {batch_size:4d} width {width:4d} layers {layers} loss {loss:.6}")
                network.save(f"networks/net.{batch_size}.{width}.{layers}.mdl")

grid_search()

training network for 10000 epochs
epoch     0 loss = 0.0288365
epoch   100 loss = 0.0243461
epoch   200 loss = 0.0234561
epoch   300 loss = 0.0231654
epoch   400 loss = 0.0229549
epoch   500 loss = 0.0228417
epoch   600 loss = 0.0227424
epoch   700 loss = 0.0226855
epoch   800 loss = 0.0226378
epoch   900 loss = 0.0225883
epoch  1000 loss = 0.0225478
epoch  1100 loss = 0.0222942
epoch  1200 loss = 0.0222325
epoch  1300 loss = 0.0221918
epoch  1400 loss = 0.0221724
epoch  1500 loss = 0.0221245
epoch  1600 loss = 0.0220972
epoch  1700 loss = 0.0220511
epoch  1800 loss = 0.0220135
epoch  1900 loss = 0.0219908
epoch  2000 loss = 0.0219593
epoch  2100 loss = 0.021977
epoch  2200 loss = 0.0219647
epoch  2300 loss = 0.0219248
epoch  2400 loss = 0.0218868
epoch  2500 loss = 0.0218579
epoch  2600 loss = 0.0218372
epoch  2700 loss = 0.0218024
epoch  2800 loss = 0.0217642
epoch  2900 loss = 0.0217346
epoch  3000 loss = 0.0216962
epoch  3100 loss = 0.0216548
epoch  3200 loss = 0.0216275
epoch  330

2021-07-23 17:27:49.389010: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-07-23 17:27:49.798779: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
2021-07-23 17:31:55.573617: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)


In [14]:
def foo():
    pass
    # x, mask, y = election_memory.get_batch(1)
    # o = network(x)
    # print(x)
    #
    # xx = np.arange(0, n_bins)
    # plt.plot(o.numpy()[0])
    #
    # plt.scatter(xx, x.numpy()[0])

