In [None]:

import datetime as datetime
import os.path
import pickle
from typing import List

import numpy as np
import tensorflow as tf

from ActionMemory import ActionMemory
from Ballot import Ballot
from Candidate import Candidate
from DefaultConfigOptions import *
from ElectionConstructor import ElectionConstructor, construct_irv, construct_h2h
from Ideology import Ideology
from ModelStats import ModelStats
from NDPopulation import NDPopulation
from ProcessResult import ProcessResult
from Tensor import Tensor
from Timings import Timings


class ACCandidateNetwork(tf.keras.Model):
    def __init__(self, ideology_dim: int, n_latent: int, width: int):
        super().__init__()
        self.input_width = ideology_dim
        self.output_width = ideology_dim
        self.n_latent = n_latent

        self.encoding_layers = []
        self.encoding_layers.append(tf.keras.layers.Dense(width, activation='relu'))
        self.encoding_layers.append(tf.keras.layers.Dense(width, activation='relu'))
        self.encoding_layers.append(tf.keras.layers.Dense(width, activation='relu'))

        self.state = tf.keras.layers.Dense(self.n_latent)

        self.critic_layers = []
        self.critic_layers.append(tf.keras.layers.Dense(width, activation='relu'))
        self.critic_layers.append(tf.keras.layers.Dense(width, activation='relu'))
        self.critic_layers.append(tf.keras.layers.Dense(width, activation='relu'))

        self.actor_layers = []
        self.actor_layers.append(tf.keras.layers.Dense(width, activation='relu'))
        self.actor_layers.append(tf.keras.layers.Dense(width, activation='relu'))
        self.actor_layers.append(tf.keras.layers.Dense(width, activation='relu'))

        self.dropout = tf.keras.layers.Dropout(.3)
        self.actions = tf.keras.layers.Dense(ideology_dim)
        self.critic = tf.keras.layers.Dense(1)

    # input is a tensor of shape (batch_size, n_observations (n_candidates), input_dim)
    def call(self, input: Tensor, training: bool = None, mask: bool = None) -> Tensor:
        # runs the encoder portion of the model on a single input
        if input.shape[1] != 0:
            x = input
            for e in self.encoding_layers:
                x = self.dropout(e(x), training=training)
            # reduce to state observations
            encoded_observations = self.dropout(self.state(x), training=training)
            # now, sum the observations (which have been put on dim 1)
            encoded_state = tf.reduce_sum(encoded_observations, axis=1, keepdims=False)
        else:
            # this corresponds to no candidates in the race yet.
            batch_size = input.shape[0]
            encoded_state = tf.zeros(shape=(batch_size, self.n_latent), dtype=tf.dtypes.float32)

        # use that composite state to predict the returns for each possible action
        x = encoded_state
        for a in self.actor_layers:
            x = self.dropout(a(x), training=training)
        actions = self.actor(x)

        x = tf.concat([encoded_state, actions], axis= 1)
        for c in self.critic_layers:
            x = self.dropout(c(x), training=training)
        value = self.critic(x)


        return actions, value

    def update(self, state: Tensor, reward: Tensor):
        with tf.GradientTape() as t1, tf.GradientTape() as t2, tf.GradientTape() as t3:
            action, value = self.call(state, training=True, mask=None)
            loss = tf.reduce_mean(tf.square(value - reward))

        critic_grads = t1.gradient(loss, self.trainable_variables)
        critic_grad_vars = self.filter_grads(critic_grads)
        dvda = t2.gradient(value, action)
        actor_grads = t3.gradient(action, self.trainable_variables, -dvda)
        actor_grad_vars = self.filter_grads(actor_grads)

        gvd = {}
        vars = {}
        for g, v in critic_grad_vars:
            vars[v.ref()] = v
            gvd[v.ref()] = g

        for g, v in actor_grad_vars:
            gvd[v.ref()] = gvd[v.ref()] + g

        combined_gv = []
        for vref in gvd.keys():
            combined_gv.append( (gvd[vref], vars[vref]))
        self.adam.apply_gradients(combined_gv)

class ACCandidateModel:
    def __init__(self,
                 ideology_bins: int,
                 ideology_dim: int,
                 n_hidden: int,
                 n_latent: int,
                 learn_rate: float):
        super().__init__()
        self.ideology_bins = ideology_bins
        self.ideology_dim = ideology_dim
        self.n_hidden = n_hidden
        self.n_latent = n_latent
        self.learn_rate = learn_rate
        self.model = ACCandidateNetwork(ideology_dim=ideology_dim,
                                      n_latent=n_latent,
                                      width=n_hidden)
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate)
        self.global_step = 0
        self.memory = ActionMemory(1024, ideology_dim, ideology_dim)
        self.action_width = self.ideology_bins * self.ideology_dim
        self.ideology_range = 6

        # this is the dimension of the input vector for a single opponent.  It can be the same as ideology_dim, or
        # it could be ideology_dim * ideology_bins for a one_hot representation of ideology
        self.input_width = ideology_bins * ideology_dim

        current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        self.log_dir = 'logs/' + current_time + '/train'
        self.summary_writer = tf.summary.create_file_writer(self.log_dir)
        self.model_path = ""

    def save_to_file(self, path: str):
        self.model_path = path + ".model"
        self.model.save(self.model_path)
        with open(path, "wb") as f:
            pickle.dump(self, f)

    def __getstate__(self):
        state = self.__dict__.copy()
        # Don't pickle the model
        del state["model"]
        del state["memory"]
        del state["optimizer"]
        del state["summary_writer"]
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)
        self.memory = ActionMemory(100 * 1000, self.ideology_dim, self.ideology_dim)
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learn_rate)
        self.model = tf.keras.models.load_model(self.model_path)
        self.summary_writer = tf.summary.create_file_writer(self.log_dir)

    def ready(self) -> bool:
        return self.memory.ready()

    def train(self, batch_size: int):
        for depth in self.memory.depths():
            state, action, reward = self.memory.get_batch(depth, batch_size)
            self.update(state, action, reward)
        self.global_step += 1

    def update(self, input_batch: np.ndarray, actions: np.ndarray, reward: np.ndarray):
        batch_size = input_batch.shape[0]
        one_hot = tf.one_hot(actions, depth=self.ideology_bins)
        # flatten the one_hot array out to match the output of the network
        # each row will have 'ideology_dim' hot elements, one in each chunk of 'ideology_bins'
        one_hot = tf.reshape(one_hot, shape=(batch_size, self.action_width))
        with tf.GradientTape() as tape:
            y = self.model(input_batch, training=True)
            rewards = tf.ones(shape=(batch_size, self.action_width)) * reward
            deltas = tf.square(y - rewards) * one_hot
            loss = tf.reduce_sum(deltas)

        with self.summary_writer.as_default():
            tf.summary.scalar('loss', loss, step=self.global_step)

        grads = tape.gradient(loss, self.model.variables)
        filtered_grad_vars = [(grad, var) for (grad, var) in zip(grads, self.model.trainable_variables) if
                              grad is not None]
        self.optimizer.apply_gradients(filtered_grad_vars, self.global_step)

    def convert_ideology_to_input(self, ideology: Ideology) -> Tensor:
        return self.convert_ideology_to_input_onehot(ideology)

    def convert_ideology_to_input_vec(self, ideology: Ideology) -> Tensor:
        return ideology.vec.astype(dtype=np.float32)

    def convert_ideology_to_input_onehot(self, ideology: Ideology) -> Tensor:
        float_vec = (ideology.vec / self.ideology_range + .5) * self.ideology_bins
        one_hot = tf.one_hot(tf.cast(float_vec, tf.dtypes.int32), depth=self.ideology_bins)
        return tf.reshape(one_hot, shape=(self.input_width))

    def convert_ideology_to_int(self, ideology: float):
        return int((ideology + self.ideology_range / 2) / self.ideology_range * self.ideology_bins)

    # the action vector is a vector of integers corresponding to the actions
    # taken where each action is a location on the i'th dimension of the
    # ideological spectrum
    # i.e.  an ideology of [0,0,0] would correspond to [100, 100, 100]
    #
    def convert_ideology_to_action_vec(self, ideology: Ideology) -> Tensor:
        ii = [self.convert_ideology_to_int(i) for i in ideology.vec]
        return tf.constant(ii, dtype=tf.dtypes.int32)

    def get_state_from_opponents(self, opponents: List[Candidate]) -> Tensor:
        # shape is (observation_id, ideology_representation_vec)
        if len(opponents) != 0:
            candidate_observations = [self.convert_ideology_to_input(o.ideology) for o in opponents]
            state = np.stack(candidate_observations)
        else:
            state = tf.zeros(shape=(0, self.input_width), dtype=tf.dtypes.float32)

        return tf.expand_dims(state, 0)

    def add_sample_from_candidates(self, candidate: Candidate, opponents: List[Candidate], winner: Candidate):
        state = self.get_state_from_opponents(opponents)

        action = self.convert_ideology_to_action_vec(candidate.ideology)
        action = tf.expand_dims(action, 0)

        if winner == candidate:
            reward = tf.ones(shape=(1, 1), dtype=tf.dtypes.float32)
        else:
            reward = tf.zeros(shape=(1, 1), dtype=tf.dtypes.float32)

        self.memory.add_sample(state, action, reward)

    def choose_ideology(self, opponents: List[Candidate]) -> Tensor:
        state = self.get_state_from_opponents(opponents)
        ideology_pred = self.model.call(state, training=True)
        ideology_hot = tf.reshape(ideology_pred, shape=(self.ideology_dim, self.ideology_bins))
        ideology_indices = tf.cast(tf.argmax(ideology_hot, axis=1), tf.dtypes.float32)
        ideology_vec = (ideology_indices / self.ideology_bins - .5) * self.ideology_range

        return ideology_vec.numpy()

In [None]:
def create_model_and_population(ideology_bins: int, ideology_dim: int) -> (CandidateModel, NDPopulation):
    ideology_bins = 64
    hidden_ratio = 4
    n_hidden = hidden_ratio * ideology_bins * ideology_dim
    n_latent = ideology_bins * ideology_dim
    batch_size = 128
    learn_rate = .001

    model = CandidateModel(ideology_bins=ideology_bins,
                                     ideology_dim=ideology_dim,
                                     n_hidden=n_hidden,
                                     n_latent = n_latent,
                                     learn_rate= learn_rate)

    population_means = np.zeros(shape=(ideology_dim,))
    population_stddev = np.ones(shape=(ideology_dim,))
    pop = NDPopulation(population_means, population_stddev)
    return model, pop

In [None]:
def measure_representation(candidate: Candidate, voters: List[Voter]) -> float:
    n_voters = len(voters)
    balance = []
    for d in candidate.ideology.dim:
        lc = len([v for v in voters if v.ideology.vec[d] < candidate.ideology.vec[d]])
        balance.append(min(lc / n_voters, 1 - lc / n_voters))
    return float(np.mean(balance))

In [None]:
def run_sample_election(model: CandidateModel, process: ElectionConstructor, population: NDPopulation, timings: Timings):
    candidates = []
    model_entries = set(np.random.choice(range(6), 3, replace=False))
    for i in range(6):
        if i in model_entries and model.ready():
            ideology = Ideology(model.choose_ideology(candidates))
            c = Candidate("m-" + str(i), Independents, ideology, 0)
        else:
            ideology = population.unit_sample_voter().ideology
            c = Candidate("r-" + str(i), Independents, ideology, 0)

        candidates += [c]

    voters = population.generate_unit_voters(1000)
    ballots = [Ballot(v, candidates, unit_election_config) for v in voters]
    result = process.run(ballots, set(candidates))
    winner = result.winner()
    balance = measure_representation(winner, voters)

    return winner, candidates, balance


In [None]:


def train_candidate_model(model: CandidateModel, process: ElectionConstructor, population: NDPopulation):
    timings = Timings()
    stats = ModelStats()
    first = True
    while model.global_step < 5000:
        with timings.time_block("run_election"):
            winner, candidates = run_sample_election(model, process, population, timings)
        with timings.time_block("add_sample"):
            for i in range(len(candidates)):
                model.add_sample_from_candidates(candidates[i], candidates[0:i], winner)

        if model.ready():
            if first:
                print("starting to train")
                first = False

            stats.update(winner, candidates)
            with timings.time_block("model.train"):
                model.train(128)
            s = model.global_step
            if (s < 100 and s % 10 == 0) or (s < 1000 and s % 100 == 0) or s % 1000 == 0:
                stats.print(process.name, model.global_step)
                if model.global_step < 10000:
                    stats.reset()

    timings.print()

In [None]:
def check_stats(stats: ModelStats, model: CandidateModel, process: ElectionConstructor, population: NDPopulation):
    results=[]
    timings = Timings()
    for i in range(1000):
        winner, candidates, balance = run_sample_election(model, process, population, timings)
        stats.update(winner, candidates, balance)

In [None]:
def run_parameter_set(process: ElectionConstructor, ibins: int, dim: int):
    save_path = "models/cm-%s-%03d-%dD.p" % (process.name, ibins, dim)
    model, population = create_model_and_population(ibins, dim)
    if os.path.exists(save_path):
        with open(save_path, "rb") as f:
            model: CandidateModel = pickle.load(f)
    else:
        train_candidate_model(model, process, population)
        # Saving the model file is not working at this time.
        # model.save_to_file(save_path)

    stats = ModelStats()
    check_stats(stats, model, process, population)
    return stats, model

In [None]:
dims = [4]
processes = [
    ElectionConstructor(constructor=construct_irv, name="Instant Runoff"),
    ElectionConstructor(constructor=construct_h2h, name="Head-to-Head")
]

results = []
for bins in [64, 128]:
    for process in processes:
        for dim in dims:
            stats, model = run_parameter_set(process, bins, dim)
            results.append(ProcessResult(process, bins, dim, stats))
            results[-1].print()

for r in results:
    r.print()

In [None]:
import numpy as np
import tensorflow as tf
x = np.array([x for x in range(100)])
tf.gather(x, [10,20,30,40])