# Execution environment

In [23]:
!pip install kaggle-environments --upgrade


Requirement already up-to-date: kaggle-environments in /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages (1.0.12)


In [1]:
print("Import started")
from kaggle_environments import make
from kaggle_environments.envs.halite.helpers import *
import random
import math
import numpy as np
import os, glob
print("Import ended")

def store_list_of_arr(arr,path):
    a = open(path,'w')
    for i in arr:
        a.write(" ".join(list(map(str,i.tolist()))))
        a.write("\n")
    a.close()

Import started
Import ended


In [2]:
environment = make("halite", configuration={"size": 21, "startingHalite": 24000,"episodeSteps": 400}, debug=True)
agent_count = 1
environment.reset(agent_count)
state = environment.state[0]
board = Board(state.observation, environment.configuration)

# Manual Run

In [13]:
environment.reset(agent_count)
environment.run(['geneticBot.py','old/py/geneticBotv1.4.py','old/py/geneticBotv1.4.py','geneticBot.py'])
#environment.run(['farmBot.py'])
environment.render(mode="ipython", width=700, height=400)

# Trainer


In [9]:
# Runner and fitness
def run(agents,size=15,steps=400,seed=1):
    env = make("halite", configuration={"size": size, "startingHalite": ((24000/441)*size**2)//1,"episodeSteps": steps,'randomSeed':seed}, debug=False)
    env.reset(len(agents))
    env.run(agents)
    return env

def fitness_halite(env):
    state = env.state[0]
    reward = state.reward
    return reward

In [13]:
# Agent evaluation techniques
def double_fitness(agent, n, a):
    # Run 5 1v1 against comp1.py and take average fitness_halite
    res = 0
    for randomSeed in range(a,n+a):
        print("Seed-",randomSeed)
        res += fitness_halite(run([agent,'old/py/badBotv1.0.py'],seed=randomSeed)) / n
    return res

def test_fitness(weights):
    return sum(weights)


## Init + actual training

In [74]:
OLDBOT_COMPARISON = 9218
TRAIN_TARGET = 'geneticBot'
WEIGHT_SIZE = [4,4,1,1,2]

In [94]:
init_weights = []

# Uniform crossover
def crossover(parent1,parent2):
    if parent1.shape != parent2.shape:
        print("Shapes must be the same!")
    result = parent1.copy()
    cross = np.random.choice([True,False],parent1.shape)
    result[cross] = parent2[cross]
    return result

# Uniform mutation by step
def mutation(target,step):
    target = target.astype('float64')
    res = target.copy()
    res += np.random.uniform(-step,step,res.shape)
    return res

def reset():
    files = glob.glob('trainweights/*')
    for f in files:
        os.remove(f)

def build(weights):
    store_list_of_arr(weights,TRAIN_TARGET+'/weights.txt')
    os.system("python3 build.py "+TRAIN_TARGET)

def convert(weights):
    # Converts a thing to a program readable list of arrays
    a = 0
    res = []
    for i in WEIGHT_SIZE:
        res.append(weights[a:a+i])
        a+=i
    return res

# Load all weights in trainweights
def load():
    res = []
    for filepath in glob.iglob('trainweights/*.txt'):
        file = open(filepath,'r')
        a = file.read()
        file.close()
        res.append(np.array(a.split()))
    return res

# Just a simple GA
def simple_train(population,step,iterations,initial=None):
    # Build new 
    N = sum(WEIGHT_SIZE)
    batch = None

    #Should we initialize with random
    if initial != None:
        batch = initial
        a = 0
        if len(initial) != population:
            a = population - len(inital)
        for i in range(a):
            batch.append(np.random.uniform(-step*10,step*10,(N)))
    else:
        batch = [np.random.uniform(-step*10,step*10,(N))for pop in range(population)]

    for i in range(iterations):

        print("====================")
        print("Iteration", i, "starting")
        if i % 1 == 0: #Tunable
            print("Saving all weights")
            reset()
            for j,agent in enumerate(batch):
                store_list_of_arr(convert(agent),'trainweights/'+str(j)+".txt")

        a = random.randint(10,100)
        print("Training seed ")
        # Training
        scores = []
        for agent in batch:
            build(convert(agent))
            scores.append([double_fitness(TRAIN_TARGET+".py",1,a),agent])
            # Shit never actually built a shipyard (most likely)
            if scores[-1][0] == 5000.0:
                scores[-1][0] = 0
            print("Agent calculated",scores[-1][0])
        scores.sort(reverse=True,key=lambda x:x[0])

        # TODO: Account for diversity
        print("Maximum: ",max(scores,key=lambda x:x[0]))

        # Take the top 25%
        top = population // 4
        stay = [x[1] for x in scores[0:top]]
        mutate = [mutation(x,step) for x in stay]
        cross = [crossover(random.choice(stay),random.choice(stay)) for x in stay]
        both = [mutation(crossover(random.choice(stay),random.choice(stay)),step) for x in stay]
        batch = stay + mutate + cross + both
    

In [99]:
simple_train(40,0.3,15,load())

culated 19102.0
Seed- 21
Agent calculated 5272.0
Seed- 21
Agent calculated 15948.0
Seed- 21
Agent calculated 19956.0
Seed- 21
Agent calculated 15358.0
Seed- 21
Agent calculated 15720.0
Seed- 21
Agent calculated 50690.0
Seed- 21
Agent calculated 14748.0
Seed- 21
Agent calculated 12600.0
Seed- 21
Agent calculated 16703.0
Seed- 21
Agent calculated 12407.0
Seed- 21
Agent calculated 16915.0
Seed- 21
Agent calculated 18766.0
Seed- 21
Agent calculated 22334.0
Seed- 21
Agent calculated 3825.0
Seed- 21
Agent calculated -390.0
Seed- 21
Agent calculated 13185.0
Seed- 21
Agent calculated 17513.0
Maximum:  [50690.0, array(['-2.291619034234425', '-1.74799449484775', '0.8592832999061621',
       '1.0822505781551683', '-1.8036841973312157', '-2.5062533862947647',
       '-0.16265622191617668', '-0.4116750259981137',
       '0.4403939744628649', '2.369336496135846', '0.13686425551800863',
       '-0.8912618115053506'], dtype='<U20')]
Iteration 3 starting
Saving all weights
Training seed 
Seed- 100
Agen