# Ensemble of Geese agents

The ensemble consists of:

* [Smart Geese Trained by Reinforcement Learning](https://www.kaggle.com/yuricat/smart-geese-trained-by-reinforcement-learning)

* [Keras Model trained by imitation learning](http://www.kaggle.com/richardsmith2/keras-model-trained-by-imitation-learning)

* my own tiny model trained using [Let's create your agent by supervised learning!](https://www.kaggle.com/nejumi/let-s-create-your-agent-by-supervised-learning)

Notebook includes pipeline for testing the ensemble agent.

# Import weights from dataset to avoid 1mb limit problem and writing to submission

In [None]:
import pickle

with open('../input/model-weights/w_tf.pickle', 'rb') as handle:
    weight = pickle.load(handle)
with open('../input/model-weights/w_step.pickle', 'rb') as handle:
    weight2 = pickle.load(handle)
with open('../input/model-weights/w_torch.pickle', 'rb') as handle:
    PARAM = pickle.load(handle)

In [None]:
w = "weight= %s"%weight
%store w >submissionans.py
w = "weight2= %s"%weight2
%store w >>submissionans.py
w = "PARAM= %s"%PARAM
%store w >>submissionans.py

In [None]:
w = "weight = %s"%weight2
%store w >submission_step.py

# TF model functions

In [None]:
import bz2
import base64
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, GlobalAveragePooling2D, Dense, Conv2D, Activation, Lambda, Add, BatchNormalization, Input
from tensorflow.keras import backend as K
from tensorflow.keras.regularizers import l1_l2, l2
from kaggle_environments.envs.hungry_geese.hungry_geese import Action, translate


def TorusConv2D(x, ch, kernel, padding="same", strides=1, weight_decay=2e-3):
    x = Lambda(lambda x: K.tile(x, n=(1,3,3,1)), 
               output_shape=lambda input_shape: (None, 3*input_shape[1], 3*input_shape[2], input_shape[3]))(x)
    
    x = Conv2D(ch, kernel, padding=padding, strides=strides,
                      kernel_regularizer=l2(weight_decay))(x)
    
    x = Lambda(lambda x: x[:,int(x.shape[1]/3):2*int(x.shape[1]/3), int(x.shape[2]/3):2*int(x.shape[2]/3),:], 
               output_shape=lambda input_shape: (None, int(input_shape[1]/3), int(input_shape[2]/3), input_shape[3]))(x)
    return x

def conv_bn_relu(x0, ch, kernel, padding="same", strides=1, weight_decay=2e-3, add=False):
    x = TorusConv2D(x0, ch, kernel, padding=padding, strides=strides,
                      weight_decay=weight_decay)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    if add:
        x = Add()([x0, x])
    return x

def GeeseNet(input_shape=(7, 11, 17), layers=12, filters=32, weight_decay=2e-3):
    input = Input(input_shape)
    x = conv_bn_relu(input, filters, 3)
    
    for i in range(layers):
        x = conv_bn_relu(x, filters, 3, add=True)
    
    x = GlobalAveragePooling2D()(x)
    
    output = Dense(4, activation='softmax', kernel_regularizer=l1_l2(l1=0.0005, l2=0.0005))(x)   
    model = Model(input, output)
    
    return model

def GeeseNet_step(input_shape=(7, 11, 18), layers=12, filters=32, weight_decay=2e-3):
    input = Input(input_shape)
    x = conv_bn_relu(input, filters, 3)
    
    for i in range(layers):
        x = conv_bn_relu(x, filters, 3, add=True)
    
    x = Flatten()(x)
    y = tf.reduce_mean(input, axis=[1, 2])

    z = tf.keras.layers.concatenate([x,y])
    x = Dense(64, activation='selu')(z)     
    output = Dense(4, activation='softmax', kernel_regularizer=l1_l2(l1=0.000005, l2=0.000005))(x)   
    model = Model(input, output)
   
    return model

def centerize(b):
    dy, dx = np.where(b[0])
    centerize_y = (np.arange(0,7)-3+dy[0])%7
    centerize_x = (np.arange(0,11)-5+dx[0])%11
    
    b = b[:, centerize_y,:]
    b = b[:, :,centerize_x]
    
    return b

def make_input(obs,obs_prev):
    b = np.zeros((17, 7 * 11), dtype=np.float32)
    for p, pos_list in enumerate(obs['geese']):

        for pos in pos_list[:1]:
            b[0 + (4 + p - obs['index']) % 4, pos] = 1

        for pos in pos_list[-1:]:
            b[4 + (4 + p - obs['index']) % 4, pos] = 1

        for pos in pos_list:
            b[8 + (4 + p - obs['index']) % 4, pos] = 1

    if  obs_prev:
        for p, pos_list in enumerate(obs_prev['geese']):
            for pos in pos_list[:1]:
                b[12 + (4 + p - obs['index']) % 4, pos] = 1

    # food
    for pos in obs['food']:
        b[16, pos] = 1
        
    b = b.reshape(-1, 7, 11)
    b = centerize(b)
    b = np.transpose(b, (1,2,0))
    return b

def make_input_step(obs,obs_prev):
    b = np.zeros((18, 7 * 11), dtype=np.float32)
    if "step" in obs:
        steps = np.zeros((7 * 11), dtype=np.float32) * (obs['step'] / 200)
        b[17,:] = steps
    for p, pos_list in enumerate(obs['geese']):

        for pos in pos_list[:1]:
            b[0 + (4 + p - obs['index']) % 4, pos] = 1

        for pos in pos_list[-1:]:
            b[4 + (4 + p - obs['index']) % 4, pos] = 1

        for pos in pos_list:
            b[8 + (4 + p - obs['index']) % 4, pos] = 1

    if  obs_prev:
        for p, pos_list in enumerate(obs_prev['geese']):
            for pos in pos_list[:1]:
                b[12 + (4 + p - obs['index']) % 4, pos] = 1

    # food
    for pos in obs['food']:
        b[16, pos] = 1
        
    b = b.reshape(-1, 7, 11)
    b = centerize(b)
    b = np.transpose(b, (1,2,0))
    return b


model = GeeseNet(input_shape=(7, 11, 17), layers=12, filters=32, weight_decay=1e-7)
model.set_weights(pickle.loads(bz2.decompress(base64.b64decode(weight))))

model_step = GeeseNet_step(input_shape=(7, 11, 18), layers=12, filters=32, weight_decay=1e-7)
model_step.set_weights(pickle.loads(bz2.decompress(base64.b64decode(weight2))))



In [None]:
def getValidMoves(obs, last_obs, index):   
        geese = obs.geese
        pos = geese[index][0]
        obstacles = {position for goose in geese for position in goose[:-1]}
        if last_obs is not None: obstacles.add(last_obs.geese[index][0])
        
        valid_moves = [
            translate(pos, action, 11, 7) not in obstacles
            for action in [Action.NORTH, Action.SOUTH, Action.WEST, Action.EAST]
        ]
    
        return valid_moves


ACTIONS = ['NORTH', 'SOUTH', 'WEST', 'EAST']


# Agent for TF models

In [None]:
class TFAgent:
    def __init__(self, net, stochastic = False):
        self.prev_obs = None
        self.net = net
        self.stochastic = stochastic

    def __call__(self, observation, configuration):

        if observation['step'] == 0:
            self.prev_obs = None
            valids = 1
        else:
            valids = getValidMoves(observation, self.prev_obs, observation.index)
        state = make_input(observation, self.prev_obs)

        state = tf.expand_dims(state, 0)
        logits = self.net(state, training=False)
        logits = tf.squeeze(logits).numpy() * valids + 1e-14

        probs = logits / np.sum(logits)
        self.prev_obs =  observation
        
        if self.stochastic:
            return probs

        else:
            return ACTIONS[np.argmax(probs)]

# Agent for TF-step model

In [None]:
class TFAgent_step:
    def __init__(self, net, stochastic = False):
        self.prev_obs = None
        self.net = net
        self.stochastic = stochastic

    def __call__(self, observation, configuration):

        if observation['step'] == 0:
            self.prev_obs = None
            valids = 1
        else:
            valids = getValidMoves(observation, self.prev_obs, observation.index)
        state = make_input_step(observation, self.prev_obs)

        state = tf.expand_dims(state, 0)
        logits = self.net(state, training=False)
        logits = tf.squeeze(logits).numpy() * valids + 1e-14

        probs = logits / np.sum(logits)
        self.prev_obs =  observation
        
        if self.stochastic:
            return probs

        else:
            return ACTIONS[np.argmax(probs)]

# Agent for Torch model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class TorusConv2d_t(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.edge_size = (kernel_size[0] // 2, kernel_size[1] // 2)
        self.conv = nn.Conv2d(input_dim, output_dim, kernel_size=kernel_size)
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = torch.cat([x[:,:,:,-self.edge_size[1]:], x, x[:,:,:,:self.edge_size[1]]], dim=3)
        h = torch.cat([h[:,:,-self.edge_size[0]:], h, h[:,:,:self.edge_size[0]]], dim=2)
        h = self.conv(h)
        h = self.bn(h) if self.bn is not None else h
        return h


class GeeseNet_t(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = TorusConv2d_t(17, filters, (3, 3), True)
        self.blocks = nn.ModuleList([TorusConv2d_t(filters, filters, (3, 3), True) for _ in range(layers)])
        self.head_p = nn.Linear(filters, 4, bias=False)
        self.head_v = nn.Linear(filters * 2, 1, bias=False)

    def forward(self, x):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))
        h_head = (h * x[:,:1]).view(h.size(0), h.size(1), -1).sum(-1)
        h_avg = h.view(h.size(0), h.size(1), -1).mean(-1)
        p = self.head_p(h_head)
        v = torch.tanh(self.head_v(torch.cat([h_head, h_avg], 1)))

        return {'policy': p, 'value': v}

def make_input_t(obs, obs_prev):
    b = np.zeros((17, 7 * 11), dtype=np.float32)


    for p, pos_list in enumerate(obs['geese']):

        for pos in pos_list[:1]:
            b[0 + (p - obs['index']) % 4, pos] = 1

        for pos in pos_list[-1:]:
            b[4 + (p - obs['index']) % 4, pos] = 1

        for pos in pos_list:
            b[8 + (p - obs['index']) % 4, pos] = 1
            

    if obs_prev:
        for p, pos_list in enumerate(obs_prev['geese']):
            for pos in pos_list[:1]:
                b[12 + (p - obs['index']) % 4, pos] = 1

    for pos in obs['food']:
        b[16, pos] = 1

    return b.reshape(-1, 7, 11)


state_dict = pickle.loads(bz2.decompress(base64.b64decode(PARAM)))
model_t = GeeseNet_t()
model_t.load_state_dict(state_dict)
model_t.eval()

class TorchAgent:
    def __init__(self, net, stochastic = False):
        self.prev_obs = None
        self.net = net
        self.stochastic = stochastic

    def __call__(self, observation, configuration):

        if observation['step'] == 0:
            self.prev_obs = None
            valids = 1
        else:
            valids = getValidMoves(observation, self.prev_obs, observation.index)
        x = make_input_t(observation, self.prev_obs)
        with torch.no_grad():
            xt = torch.from_numpy(x).unsqueeze(0)
            o = self.net(xt)
        logits = o['policy'].squeeze(0).detach().numpy() 


        probs = np.exp(logits) / np.sum(np.exp(logits))
        probs = probs * valids + 1e-14        
        probs = probs / np.sum(probs)
        self.prev_obs =  observation
        
        if self.stochastic:
            return probs

        else:
            return ACTIONS[np.argmax(probs)]

# Geesamble Agent for testing

self.w contains weights of each agent in result decision

In [None]:
class Geesamble_Agent:
    def __init__(self):
        self.prev_obs = None
        self.agents = [TFAgent(model, True), TFAgent_step(model_step, True), TorchAgent(model_t, True)]
        self.w = np.array([[0.34], [0.33], [0.33]])
    def __call__(self, observation, configuration):

        if observation['step'] == 0:
            self.prev_obs = None
            valids = 1
        else:
            valids = getValidMoves(observation, self.prev_obs, observation.index)

        logits = [ag(observation, None) for ag in self.agents]
#         print(logits)
        probs = np.array(logits) * self.w
#         print(probs)
        probs = np.sum(probs, axis = 0)
#         print(probs)
        self.prev_obs =  observation
        
        return ACTIONS[np.argmax(probs)]
    
    

# Writing submission

In [None]:
%%writefile -a submissionans.py
import pickle
import bz2
import base64
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, GlobalAveragePooling2D, Dense, Conv2D, Activation, Lambda, Add, BatchNormalization, Input
from tensorflow.keras import backend as K
from tensorflow.keras.regularizers import l1_l2, l2
from kaggle_environments.envs.hungry_geese.hungry_geese import Action, translate



def TorusConv2D(x, ch, kernel, padding="same", strides=1, weight_decay=2e-3):
    x = Lambda(lambda x: K.tile(x, n=(1,3,3,1)), 
               output_shape=lambda input_shape: (None, 3*input_shape[1], 3*input_shape[2], input_shape[3]))(x)
    
    x = Conv2D(ch, kernel, padding=padding, strides=strides,
                      kernel_regularizer=l2(weight_decay))(x)
    
    x = Lambda(lambda x: x[:,int(x.shape[1]/3):2*int(x.shape[1]/3), int(x.shape[2]/3):2*int(x.shape[2]/3),:], 
               output_shape=lambda input_shape: (None, int(input_shape[1]/3), int(input_shape[2]/3), input_shape[3]))(x)
    return x

def conv_bn_relu(x0, ch, kernel, padding="same", strides=1, weight_decay=2e-3, add=False):
    x = TorusConv2D(x0, ch, kernel, padding=padding, strides=strides,
                      weight_decay=weight_decay)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    if add:
        x = Add()([x0, x])
    return x

def GeeseNet(input_shape=(7, 11, 17), layers=12, filters=32, weight_decay=2e-3):
    input = Input(input_shape)
    x = conv_bn_relu(input, filters, 3)
    
    for i in range(layers):
        x = conv_bn_relu(x, filters, 3, add=True)
    
    x = GlobalAveragePooling2D()(x)
    
    output = Dense(4, activation='softmax', kernel_regularizer=l1_l2(l1=0.0005, l2=0.0005))(x)   
    model = Model(input, output)
    
    return model

def GeeseNet_step(input_shape=(7, 11, 18), layers=12, filters=32, weight_decay=2e-3):
    input = Input(input_shape)
    x = conv_bn_relu(input, filters, 3)
    
    for i in range(layers):
        x = conv_bn_relu(x, filters, 3, add=True)
    
    x = Flatten()(x)
    y = tf.reduce_mean(input, axis=[1, 2])

    z = tf.keras.layers.concatenate([x,y])
    x = Dense(64, activation='selu')(z)     
    output = Dense(4, activation='softmax', kernel_regularizer=l1_l2(l1=0.000005, l2=0.000005))(x)   
    model = Model(input, output)
   
    return model

def centerize(b):
    dy, dx = np.where(b[0])
    centerize_y = (np.arange(0,7)-3+dy[0])%7
    centerize_x = (np.arange(0,11)-5+dx[0])%11
    
    b = b[:, centerize_y,:]
    b = b[:, :,centerize_x]
    
    return b

def make_input(obs,obs_prev):
    b = np.zeros((17, 7 * 11), dtype=np.float32)
    for p, pos_list in enumerate(obs['geese']):

        for pos in pos_list[:1]:
            b[0 + (4 + p - obs['index']) % 4, pos] = 1

        for pos in pos_list[-1:]:
            b[4 + (4 + p - obs['index']) % 4, pos] = 1

        for pos in pos_list:
            b[8 + (4 + p - obs['index']) % 4, pos] = 1

    if  obs_prev:
        for p, pos_list in enumerate(obs_prev['geese']):
            for pos in pos_list[:1]:
                b[12 + (4 + p - obs['index']) % 4, pos] = 1

    # food
    for pos in obs['food']:
        b[16, pos] = 1
        
    b = b.reshape(-1, 7, 11)
    b = centerize(b)
    b = np.transpose(b, (1,2,0))
    return b

def make_input_step(obs,obs_prev):
    b = np.zeros((18, 7 * 11), dtype=np.float32)
    if "step" in obs:
        steps = np.zeros((7 * 11), dtype=np.float32) * (obs['step'] / 200)
        b[17,:] = steps
    for p, pos_list in enumerate(obs['geese']):

        for pos in pos_list[:1]:
            b[0 + (4 + p - obs['index']) % 4, pos] = 1

        for pos in pos_list[-1:]:
            b[4 + (4 + p - obs['index']) % 4, pos] = 1

        for pos in pos_list:
            b[8 + (4 + p - obs['index']) % 4, pos] = 1

    if  obs_prev:
        for p, pos_list in enumerate(obs_prev['geese']):
            for pos in pos_list[:1]:
                b[12 + (4 + p - obs['index']) % 4, pos] = 1

    # food
    for pos in obs['food']:
        b[16, pos] = 1
        
    b = b.reshape(-1, 7, 11)
    b = centerize(b)
    b = np.transpose(b, (1,2,0))
    return b


model = GeeseNet(input_shape=(7, 11, 17), layers=12, filters=32, weight_decay=1e-7)
model.set_weights(pickle.loads(bz2.decompress(base64.b64decode(weight))))

model_step = GeeseNet_step(input_shape=(7, 11, 18), layers=12, filters=32, weight_decay=1e-7)
model_step.set_weights(pickle.loads(bz2.decompress(base64.b64decode(weight2))))

def getValidMoves(obs, last_obs, index):   
        geese = obs.geese
        pos = geese[index][0]
        obstacles = {position for goose in geese for position in goose[:-1]}
        if last_obs is not None: obstacles.add(last_obs.geese[index][0])
        
        valid_moves = [
            translate(pos, action, 11, 7) not in obstacles
            for action in [Action.NORTH, Action.SOUTH, Action.WEST, Action.EAST]
        ]
    
        return valid_moves



ACTIONS = ['NORTH', 'SOUTH', 'WEST', 'EAST']
class TFAgent:
    def __init__(self, net, stochastic = False):
        self.prev_obs = None
        self.net = net
        self.stochastic = stochastic

    def __call__(self, observation, configuration):

        if observation['step'] == 0:
            self.prev_obs = None
            valids = 1
        else:
            valids = getValidMoves(observation, self.prev_obs, observation.index)
        state = make_input(observation, self.prev_obs)

        state = tf.expand_dims(state, 0)
        logits = self.net(state, training=False)
        logits = tf.squeeze(logits).numpy() * valids + 1e-14

        probs = logits / np.sum(logits)
        self.prev_obs =  observation
        
        if self.stochastic:
            return probs

        else:
            return ACTIONS[np.argmax(probs)]
class TFAgent_step:
    def __init__(self, net, stochastic = False):
        self.prev_obs = None
        self.net = net
        self.stochastic = stochastic

    def __call__(self, observation, configuration):

        if observation['step'] == 0:
            self.prev_obs = None
            valids = 1
        else:
            valids = getValidMoves(observation, self.prev_obs, observation.index)
        state = make_input_step(observation, self.prev_obs)

        state = tf.expand_dims(state, 0)
        logits = self.net(state, training=False)
        logits = tf.squeeze(logits).numpy() * valids + 1e-14

        probs = logits / np.sum(logits)
        self.prev_obs =  observation
        
        if self.stochastic:
            return probs

        else:
            return ACTIONS[np.argmax(probs)]
import torch
import torch.nn as nn
import torch.nn.functional as F

class TorusConv2d_t(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.edge_size = (kernel_size[0] // 2, kernel_size[1] // 2)
        self.conv = nn.Conv2d(input_dim, output_dim, kernel_size=kernel_size)
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = torch.cat([x[:,:,:,-self.edge_size[1]:], x, x[:,:,:,:self.edge_size[1]]], dim=3)
        h = torch.cat([h[:,:,-self.edge_size[0]:], h, h[:,:,:self.edge_size[0]]], dim=2)
        h = self.conv(h)
        h = self.bn(h) if self.bn is not None else h
        return h


class GeeseNet_t(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = TorusConv2d_t(17, filters, (3, 3), True)
        self.blocks = nn.ModuleList([TorusConv2d_t(filters, filters, (3, 3), True) for _ in range(layers)])
        self.head_p = nn.Linear(filters, 4, bias=False)
        self.head_v = nn.Linear(filters * 2, 1, bias=False)

    def forward(self, x):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))
        h_head = (h * x[:,:1]).view(h.size(0), h.size(1), -1).sum(-1)
        h_avg = h.view(h.size(0), h.size(1), -1).mean(-1)
        p = self.head_p(h_head)
        v = torch.tanh(self.head_v(torch.cat([h_head, h_avg], 1)))

        return {'policy': p, 'value': v}

def make_input_t(obs, obs_prev):
    b = np.zeros((17, 7 * 11), dtype=np.float32)


    for p, pos_list in enumerate(obs['geese']):

        for pos in pos_list[:1]:
            b[0 + (4 + p - obs['index']) % 4, pos] = 1

        for pos in pos_list[-1:]:
            b[4 + (4 + p - obs['index']) % 4, pos] = 1

        for pos in pos_list:
            b[8 + (4 + p - obs['index']) % 4, pos] = 1
            

    if obs_prev:
        for p, pos_list in enumerate(obs_prev['geese']):
            for pos in pos_list[:1]:
                b[12 + (4 + p - obs['index']) % 4, pos] = 1

    for pos in obs['food']:
        b[16, pos] = 1

    return b.reshape(-1, 7, 11)


state_dict = pickle.loads(bz2.decompress(base64.b64decode(PARAM)))
model_t = GeeseNet_t()
model_t.load_state_dict(state_dict)
model_t.eval()

class TorchAgent:
    def __init__(self, net, stochastic = False):
        self.prev_obs = None
        self.net = net
        self.stochastic = stochastic

    def __call__(self, observation, configuration):

        if observation['step'] == 0:
            self.prev_obs = None
            valids = 1
        else:
            valids = getValidMoves(observation, self.prev_obs, observation.index)
        x = make_input_t(observation, self.prev_obs)
        with torch.no_grad():
            xt = torch.from_numpy(x).unsqueeze(0)
            o = self.net(xt)
        logits = o['policy'].squeeze(0).detach().numpy() 


        probs = np.exp(logits) / np.sum(np.exp(logits))
        probs = probs * valids + 1e-14        
        probs = probs / np.sum(probs)
        self.prev_obs =  observation
        
        if self.stochastic:
            return probs

        else:
            return ACTIONS[np.argmax(probs)]

class Geesamble_Agent:
    def __init__(self):
        self.prev_obs = None
        self.agents = [TFAgent(model, True), TFAgent_step(model_step, True), TorchAgent(model_t, True)]
        self.w = np.array([[0.34], [0.33], [0.33]])
    def __call__(self, observation, configuration):

        if observation['step'] == 0:
            self.prev_obs = None
            valids = 1
        else:
            valids = getValidMoves(observation, self.prev_obs, observation.index)

        logits = [ag(observation, None) for ag in self.agents]
#         print(logits)
        probs = np.array(logits) * self.w
#         print(probs)
        probs = np.sum(probs, axis = 0)
#         print(probs)
        self.prev_obs =  observation
        
        return ACTIONS[np.argmax(probs)]

agent_ans = Geesamble_Agent()
def agent(obs_dict, config_dict):

    return agent_ans(obs_dict, config_dict)

In [None]:
%%writefile -a submission_step.py
import pickle
import bz2
import base64
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, Conv2D, Activation, Lambda, Add, BatchNormalization, Input
from tensorflow.keras import backend as K
from tensorflow.keras.regularizers import l1_l2, l2
from kaggle_environments.envs.hungry_geese.hungry_geese import Action, translate


# Neural Network for Hungry Geese
def TorusConv2D(x, ch, kernel, padding="same", strides=1, weight_decay=2e-3):
    x = Lambda(lambda x: K.tile(x, n=(1,3,3,1)), 
               output_shape=lambda input_shape: (None, 3*input_shape[1], 3*input_shape[2], input_shape[3]))(x)
    
    x = Conv2D(ch, kernel, padding=padding, strides=strides,
                      kernel_regularizer=l2(weight_decay))(x)
    
    x = Lambda(lambda x: x[:,int(x.shape[1]/3):2*int(x.shape[1]/3), int(x.shape[2]/3):2*int(x.shape[2]/3),:], 
               output_shape=lambda input_shape: (None, int(input_shape[1]/3), int(input_shape[2]/3), input_shape[3]))(x)
    return x

def conv_bn_relu(x0, ch, kernel, padding="same", strides=1, weight_decay=2e-5, add=False):
    x = TorusConv2D(x0, ch, kernel, padding=padding, strides=strides,
                      weight_decay=weight_decay)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    if add:
        x = Add()([x0, x])
    return x

def GeeseNet(input_shape=(7, 11, 18), layers=12, filters=32, weight_decay=2e-3):
    input = Input(input_shape)
    x = conv_bn_relu(input, filters, 3)
    
    for i in range(layers):
        x = conv_bn_relu(x, filters, 3, add=True)
    
    x = Flatten()(x)
    y = tf.reduce_mean(input, axis=[1, 2])

    z = tf.keras.layers.concatenate([x,y])
    x = Dense(64, activation='selu')(z)         
    output = Dense(4, activation='softmax', kernel_regularizer=l1_l2(l1=0.000005, l2=0.000005))(x)   
    model = Model(input, output)
    #model.compile(optimizer=RadaBelief(learning_rate=1e-3, epsilon=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])    
    
    return model

# Input for Neural Network
def centerize(b):
    dy, dx = np.where(b[0])
    centerize_y = (np.arange(0,7)-3+dy[0])%7
    centerize_x = (np.arange(0,11)-5+dx[0])%11
    
    b = b[:, centerize_y,:]
    b = b[:, :,centerize_x]
    
    return b

def make_input(obses):
    b = np.zeros((18, 7 * 11), dtype=np.float32)
    obs = obses[-1]
    if "step" in obs:
        steps = np.zeros((7 * 11), dtype=np.float32) * (obs['step'] / 200)
        b[17,:] = steps
    for p, pos_list in enumerate(obs['geese']):
        # head position
        for pos in pos_list[:1]:
            b[0 + (4 + p - obs['index']) % 4, pos] = 1
        # tip position
        for pos in pos_list[-1:]:
            b[4 + (4 + p - obs['index']) % 4, pos] = 1
        # whole position
        for pos in pos_list:
            b[8 + (4 + p - obs['index']) % 4, pos] = 1
            
    # previous head position
    if len(obses) > 1:
        obs_prev = obses[-2]
        for p, pos_list in enumerate(obs_prev['geese']):
            for pos in pos_list[:1]:
                b[12 + (4 + p - obs['index']) % 4, pos] = 1

    # food
    for pos in obs['food']:
        b[16, pos] = 1
        
    b = b.reshape(-1, 7, 11)
    b = centerize(b) # Where to place the head is arbiterary dicision.

    return b

def getValidMoves(obs, last_obs, index):   
        geese = obs.geese
        pos = geese[index][0]
        obstacles = {position for goose in geese for position in goose[:-1]}
        if last_obs is not None: obstacles.add(last_obs.geese[index][0])
        
        valid_moves = [
            translate(pos, action, 11, 7) not in obstacles
            for action in [Action.NORTH, Action.SOUTH, Action.WEST, Action.EAST]
        ]
    
        return valid_moves

# Load Keras Model
model = GeeseNet(input_shape=(7, 11, 18), layers=12, filters=32, weight_decay=1e-7)
model.set_weights(pickle.loads(bz2.decompress(base64.b64decode(weight))))

obses = []

def agent(obs_dict, config_dict):
    obses.append(obs_dict)
    if len(obses)<2:
            obses.append(obs_dict)

    X_test = make_input(obses)
    X_test = np.transpose(X_test, (1,2,0))
    X_test = X_test.reshape(-1,7,11,18) # channel last.
    valids = getValidMoves(obses[-1], obses[-2], obses[-1].index)       

    
    y_pred = model.predict(X_test) * valids
 
    
    actions = ['NORTH', 'SOUTH', 'WEST', 'EAST']
    return actions[np.argmax(y_pred)]

# Testing Geesamble agent

In [None]:
%%time
from tqdm.notebook import tqdm
from kaggle_environments import evaluate
scores = np.zeros((4))
ag_ens = Geesamble_Agent()
ag_torch1 = TorchAgent(model_t)
ag_tf1 = TFAgent(model)
ag_tf_step = TFAgent_step(model_step)
for epoch in tqdm(range(100)):
    result = evaluate("hungry_geese", [ag_tf1,'submissionans.py','submissionans.py','submissionans.py'])[0]
    score = np.zeros((4))
    
    for i in range(3,0,-1):
        ind = np.argmax(result)
        score[ind] += i
        result[ind] = 0
    scores = scores + (score -1.5)
print("results :", scores)

In [None]:
from kaggle_environments import make
env = make("hungry_geese", debug=True)


env.reset()
env.run(['submission_step.py',Geesamble_Agent(),"submissionans.py",TFAgent(model)])
env.render(mode="ipython", width=600, height=500)