In [1]:
import numpy as np
import random
import os
from deap import base
from deap import creator
from deap import tools
from scipy.stats import mannwhitneyu

# Nueral network

In [2]:
class MLP(object):
    def __init__(self):
        numInput = 3
        numHidden1 = 16
        numHidden2 = 16
        numOutput = 4
        self.fitness = 0
        self.numInput = numInput + 1 # Add bias node from input to hidden layer 1 only
        self.numHidden1 = numHidden1 # Feel free to adapt the code to add more biases if you wish
        self.numHidden2 = numHidden2
        self.numOutput = numOutput

        self.w_i_h1 = np.random.randn(self.numHidden1, self.numInput) 
        self.w_h1_h2 = np.random.randn(self.numHidden2, self.numHidden1) 
        self.w_h2_o = np.random.randn(self.numOutput, self.numHidden2)

        self.ReLU = lambda x : max(0,x)

    def softmax(self, x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()
    
    def feedForward(self, inputs):
        inputsBias = inputs[:]
        inputsBias.insert(len(inputs),1)             # Add bias input

        h1 = np.dot(self.w_i_h1, inputsBias)         # feed input to hidden layer 1
        h1 = [self.ReLU(x) for x in h1]              # Activate hidden layer1
        
        h2 = np.dot(self.w_h1_h2, h1)                 # feed layer 1 to hidden layer 2
        h2 = [self.ReLU(x) for x in h2]              # Activate hidden layer 2

        output = np.dot(self.w_h2_o, h2)             # feed to output layer
        #output = [self.sigmoid(x) for x in output]   # Activate output layer
        return self.softmax(output)

    def getWeightsLinear(self):
        flat_w_i_h1 = list(self.w_i_h1.flatten())
        flat_w_h1_h2 = list(self.w_h1_h2.flatten())
        flat_w_h2_o = list(self.w_h2_o.flatten())
        return( flat_w_i_h1 + flat_w_h1_h2 + flat_w_h2_o )

    def setWeightsLinear(self, Wgenome):
        numWeights_I_H1 = self.numHidden1 * self.numInput
        numWeights_H1_H2 = self.numHidden2 * self.numHidden1
        numWeights_H2_O = self.numOutput * self.numHidden2

        self.w_i_h1 = np.array(Wgenome[:numWeights_I_H1])
        self.w_i_h1 = self.w_i_h1.reshape((self.numHidden1, self.numInput))
        
        self.w_h1_h2 = np.array(Wgenome[numWeights_I_H1:(numWeights_H1_H2+numWeights_I_H1)])
        self.w_h1_h2 = self.w_h1_h2.reshape((self.numHidden2, self.numHidden1))

        self.w_h2_o = np.array(Wgenome[(numWeights_H1_H2+numWeights_I_H1):])
        self.w_h2_o = self.w_h2_o.reshape((self.numOutput, self.numHidden2))

    def getIndSize(self):
        ind_size = (((self.numInput) * self.numHidden1) +
                    (self.numHidden1 * self.numHidden2) +
                    (self.numHidden2 * self.numOutput))
        return ind_size 

# Genetic Algorithm

In [3]:
class GA:
    def __init__(self, hyperparams, toolbox, stats, net):
        self.MUTPB = hyperparams[0]
        self.CXPB = hyperparams[1]
        self.ITERATIONS = hyperparams[2]
        self.NGEN = hyperparams[3]
        self.POPSIZE = hyperparams[4]
        self.GAMES = hyperparams[5]
        self.NONADAPT = hyperparams[6]

        self.myNet = net
        self.IND_SIZE = self.myNet.getIndSize()

        self.toolbox = toolbox
        self.stats = stats

    def evolve(self):
        logbook = tools.Logbook()
        self.pop = self.toolbox.population(n=self.POPSIZE)
        #fitnesses = evaluate_self.pop(self.pop)
        fitnesses = self.evaluate()
        for ind, fit in zip(self.pop, fitnesses):
            ind.fitness.values = fit

        for g in range(self.NGEN):
            offspring = self.toolbox.select(self.pop, len(self.pop))
            offspring = list(map(self.toolbox.clone, offspring))
            if self.CXPB > 0.0:
                for child1, child2 in zip(offspring[::2], offspring[1::2]):
                    if random.random() < self.CXPB:
                        self.toolbox.mate(child1, child2)
                        del child1.fitness.values
                        del child2.fitness.values

            for mutant in offspring:
                self.toolbox.mutate(mutant)
                del mutant.fitness.values

            invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
            #fitnesses = evaluate_self.pop(invalid_ind)
            fitnesses = self.evaluate()
            for ind, fit in zip(invalid_ind, fitnesses):
                ind.fitness.values = fit

            self.pop[:] = offspring
            record = self.stats.compile(self.pop)
            logbook.record(gen=g, **record)
     
            if g % 100 == 0:
                print("-- Generation %i --" % g)
                pop_fitnesses = [self.pop[x].fitness.values for x in range(len(self.pop))]
                pop_average = round(np.mean(pop_fitnesses),2)
                print("Avg: " + str(pop_average))
        return logbook

    def evaluate(self):
        pop_stats = [[0,0]]*len(self.pop)
        fitnesses = []
        players = list(range(len(self.pop)))
        society = Society(players, self.pop, self.myNet, self.NONADAPT)
        
        for game in range(self.GAMES):
            player1 = players[random.randrange(len(players))]
            player2 = players[random.randrange(len(players))] 
            score1, score2 = society.run_round(player1, player2)
            pop_stats[player1][0] += 1 
            pop_stats[player2][0] += 1 
            pop_stats[player1][1] += score1 
            pop_stats[player2][1] += score2 
        
        for player in pop_stats:
            fitnesses.append((player[1]/player[0],))
        return fitnesses

# Simulation

In [4]:
class Society:
    def __init__(self, players, brains, net, non_adapt=False):
        self.players = {player:None for player in players}
        self.brains = brains
        self.net = net
        self.non_adapt = non_adapt
        self.set_initial_socs()

    def set_initial_socs(self):
        for player in self.players:
            self.net.setWeightsLinear(self.brains[player])
            output = self.net.feedForward([-1]*3) # TODO Find right initial values for net
            self.set_soc(player, output)

    def set_soc(self, player, output):
        decision = np.argmax(output, axis=0)
        if self.non_adapt == True:
            decision = random.randint(0,3)
        # Saints encoded to 0, buddies encoded to 1, fight_club encoded to 2,
        # vandals encoded to 3
        if decision == 0:
            self.players[player] = 0
        if decision == 1:
            self.players[player] = 1
        if decision == 2:
            self.players[player] = 2
        if decision == 3:
            self.players[player] = 3

    def run_round(self, player1, player2):
        self.player1 = player1
        self.player2 = player2
        self.get_behaviour()
        self.get_points()
        if self.non_adapt == False:
            self.normalize_data()
            self.make_decisions()
        return self.p1_points, self.p2_points

    def get_behaviour(self):
        # 0 for cooperate, 1 for be selfish
        self.p1_soc = self.players[self.player1]
        self.p2_soc = self.players[self.player2]

        if self.p1_soc == 0:
            self.p1_behav = 0
        if self.p2_soc == 0:
            self.p2_behav = 0

        if self.p1_soc == 1:
            if self.p2_soc == 1:
                self.p1_behav = 0
            else:
                self.p1_behav = 1
        if self.p2_soc == 1:
            if self.p1_soc == 1:
                self.p2_behav = 0
            else:
                self.p2_behav = 1

        if self.p1_soc == 2:
            if self.p2_soc == 2:
                self.p1_behav = 1
            else:
                self.p1_behav = 0
        if self.p2_soc == 2:
            if self.p1_soc == 2:
                self.p2_behav = 1
            else:
                self.p2_behav = 0

        if self.p1_soc == 3:
            self.p1_behav = 1
        if self.p2_soc == 3:
            self.p2_behav = 1

    def get_points(self):
        if self.p1_behav == 0:
            if self.p2_behav == 0:
                self.p1_points = 4
                self.p2_points = 4
            else:
                self.p1_points = 0
                self.p2_points = 6
        if self.p1_behav == 1:
            if self.p2_behav == 0:
                self.p1_points = 6
                self.p2_points = 0
            else:
                self.p1_points = 1
                self.p2_points = 1

    def normalize_data(self):
        self.relative_points = (self.p1_points-self.p2_points) / (self.p1_points + self.p2_points) 

    def make_decisions(self):
        self.net.setWeightsLinear(self.brains[self.player1])
        output = self.net.feedForward([self.relative_points,
                                       self.p1_soc, self.p2_soc])
        self.set_soc(self.player1, output)

        self.net.setWeightsLinear(self.brains[self.player2])
        output = self.net.feedForward([self.relative_points*-1,
                                       self.p2_soc, self.p1_soc])
        self.set_soc(self.player2, output)

# Data saver

In [5]:
class Saver:
    def __init__(self, path):
        self.path = path

    def save(self, data, name):
        self.name = name 
        self.data = data
        count = 0
        file_name = self.name+'_'+str(count).zfill(3)
        self.file_path = os.path.join(self.path, file_name)
        if not os.path.exists(self.path):
            os.makedirs(self.path)
        if not os.path.exists(self.file_path):
            self.dump()
        while os.path.exists(self.file_path):
            count += 1
            self.file_path = os.path.join(
                    self.path, self.name+'_'+str(count).zfill(3))
        self.dump()

    def dump(self):
        with open(self.file_path, 'wb') as f:
            pickle.dump(self.data, f)

# Data formatter

In [6]:

class Data_formatter:
    def __init__(self, versions):
        self.versions = versions
        self.formatted_vers = {}
        gen_list = []
        for ver in self.versions:
            gens = self.versions[ver][0].select('gen')
            gen_list.append(gens)
            _min, _max, _avg, _std = self.get_all_means(self.versions[ver])
            mean = np.mean(_avg)
            stdv = np.std(_avg)
            max_mean = np.mean(_max)
            stats = {'gens':gens,'min':_min, 'max':_max, 'avg':_avg, 'std':_std,
                    'mean':mean, 'max_mean':max_mean, 'stdv':stdv}
            self.formatted_vers[ver] = stats
        self.gen = min(gen_list)

    def get_all_means(self,logs):
        _min = self.get_mean(logs, 'min')
        _max = self.get_mean(logs, 'max')
        _avg = self.get_mean(logs, 'avg')
        _std = self.get_mean(logs, 'std')
        return _min, _max, _avg, _std

    def get_mean(self,logbooks, selection):
        running = [0]*len(logbooks[0].select('gen'))
        for l in logbooks:
            sel = l.select(selection)
            for i in range(len(sel)):
                #print(i)
                running[i] += sel[i]
        return [x / len(logbooks) for x in running]
    
    def save_stats(self, save_path, versions):
        stats = ''
        if len(versions) >= 2:
            for ver in range(len(versions)):
                for comb in range(len(versions)-(ver+1)):
                    ver1 = versions[ver] 
                    ver2 = versions[ver+comb+1]
                    stats += '{0} against {1}\n'.format(ver1, ver2)
                    to_test = [ver1, ver2]
                    avg_p, max_p = self.mann_test(to_test)
                    stats += 'avg_p: {0} \nmax_p: {1}\n\n'.format(round(avg_p,3), 
                                                                  round(max_p,3))
        for ver in versions:
            mean = self.formatted_vers[ver]['mean']
            stdv = self.formatted_vers[ver]['stdv']
            max_mean = self.formatted_vers[ver]['max_mean']
            stats += '\n' + ver
            stats += '\nmean: {0} \nstdv: {1}\nmax_mean: {2}\n'.format(round(mean,3), 
                                                        round(stdv,3), round(max_mean,3))
        save_path = os.path.join(save_path, 'stats')
        with open(save_path, "w") as text_file:
           text_file.write(stats)


    def mann_test(self, versions):
        avg_stat, avg_p = mannwhitneyu(self.formatted_vers[versions[0]]['avg'],
                                self.formatted_vers[versions[1]]['avg'])
        max_stat, max_p = mannwhitneyu(self.formatted_vers[versions[0]]['max'],
                                self.formatted_vers[versions[1]]['max'])
        return avg_p, max_p
        
    def u_value(self, versions):
        u1 = 0
        u2 = 0
        group_a = self.formatted_vers[versions[0]]['avg']
        group_b = self.formatted_vers[versions[1]]['avg']
        length = min(len(group_a), len(group_b))
        for i in range(length):
            if (self.formatted_vers[versions[0]]['avg'][i] >
                self.formatted_vers[versions[1]]['avg'][i]):
                u1 += 1
            else:
                u2 += 1
        return min(u1,u2)

    def one_tailed(self, versions):
        iterations = 9999   
        diffs = [0] * iterations
        data_a = self.formatted_vers[versions[0]]['avg']
        data_b = self.formatted_vers[versions[1]]['avg']
        for i in range(iterations):
            list_a, list_b = self.shuffle_groups(data_a, data_b)
            meanA = sum(list_a) / len(list_a)
            meanB = sum(list_b) / len(list_b)
            diffs[i] = abs(meanA - meanB)
        actual_diff = abs(np.mean(data_a) - np.mean(data_b))
        counter = 0
        for i in range(iterations):
            if diffs[i] >= actual_diff:
                counter += 1 
        p = (counter + 1) / (iterations + 1)
        return p

    def shuffle_groups(self, list_a, list_b):
        list_both = list(list_a) + list(list_b)
        random.shuffle(list_both)
        list_a, list_b = list_both[:len(list_a)], list_both[len(list_a):]
        return(list_a, list_b)        



# Graph visualizer

# Stats tester

# GA executer

In [7]:
class Master:
    def __init__(self):
        self.set_defaults()
        self.stat_path = os.path.join('..','stats')

    def set_defaults(self):
        mut = 0.3
        cxpb = 0.0
        its = 5 
        ngen = 5000
        popsize = 50  
        games = 500
        non_adapt = False
        self.hyperparams = [mut, cxpb, its, ngen, popsize, games, non_adapt]

    def set_ga(self):
        myNet = MLP()
        IND_SIZE = myNet.getIndSize()

        toolbox = base.Toolbox()
        creator.create("FitnessMax", base.Fitness, weights=(1.0,))
        creator.create("Individual", list, fitness=creator.FitnessMax)

        toolbox.register("attr_float", random.uniform, -1.0, 1.0)
        toolbox.register("individual", tools.initRepeat, creator.Individual,
                 toolbox.attr_float, n=IND_SIZE)

        toolbox.register("select", tools.selTournament, tournsize=6)
        toolbox.register("mutate", tools.mutGaussian, mu=0.0, sigma=0.5, indpb=self.hyperparams[0])
        toolbox.register("mate", tools.cxUniform, indpb=self.hyperparams[1])
        toolbox.register("population", tools.initRepeat, list, toolbox.individual)

        stats = tools.Statistics(key=lambda ind: ind.fitness.values)
        stats.register("avg", np.mean)
        stats.register("std", np.std)
        stats.register("min", np.min)
        stats.register("max", np.max)

        self.ga = GA(self.hyperparams, toolbox, stats, myNet)

    def run(self):
        self.set_ga()
        self.logbooks = self.ga.evolve()
        #self.mean_score = np.mean(self.log_avg)
        
    def get_log_avg(self):
        total_avg = 0
        avg_logs = []
        for logbook in self.logbooks:
            log_vals = [0]*len(logbook)
            for gen in range(len(logbook)):
                log_vals[gen] += logbook[gen]['avg']/len(self.logbooks)
            avg_logs.append(log_vals)
        log_sum = [ sum(x) for x in zip(*avg_logs) ]
        self.log_avg = [x/len(self.logbooks) for x in log_sum]

    def mut_sweep(self):
        self.best_runs = {}
        for i in range(10):
            self.hyperparams[0] = i/10
            self.hyperparams[1] = 0
            self.name = 'mut:_{0}'.format(self.hyperparams[0])
            self.run()
            self.best_runs[mean_score] = name
        
    def cross_sweep(self):
        self.best_runs = {}
        for i in range(10):
            self.hyperparams[0] = 0
            self.hyperparams[1] = i/10
            self.name = 'cross:_{0}'.format(self.hyperparams[1])
            self.run()
            self.best_runs[mean_score] = name

    #def mut_cross_sweep(self):
    #    saver = Saver(os.path.join(stat_path, 'mut_cross_sweep'))
    #    best_runs = {}
    #    self.hyperparams = set_defaults()
    #    for i in range(10):
    #        for j in range(10):
    #            self.hyperparams[0] = i/10 
    #            self.hyperparams[1] = j/10
    #            name = 'mut:_{0},cross:_{1}'.format(self.hyperparams[0],self.hyperparams[1])
    #            self.logbooks = self.run(self.hyperparams)
    #            #saver.save(self.logbooks, name)
    #            avg_log = get_log_avg(self.logbooks)
    #            mean_score = np.mean(avg_log)
    #            best_runs[mean_score] = name
    #    return best_runs, saver

    def non_adaptive(self):
        self.hyperparams[6] = True
        self.name = 'non_adaptive_run'
        self.run()

    def normal_run(self):
        self.name = 'best_running_algorithm'
        self.run()

    def save(self):
        saver = Saver(os.path.join(self.stat_path, self.name))
        saver.save(self.logbooks, self.name)


# Main script

In [8]:
master = Master()
master.normal_run()
master.save()
master.non_adaptive()
master.save()

-- Generation 0 --
Avg: 2.79


KeyboardInterrupt: 

In [None]:
i