In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install kaggle-environments -U
!cp -r ../input/lux-ai-2021/* .

In [None]:
%%writefile my_agent.py

TRAIN = False

from itertools import chain, repeat
from statistics import mean, stdev, median, mode
# from time import monotonic
# from pprint import pprint
# from random import randint, random

from kaggle_environments import make

#from net import NN

# from lux import annotate
# from lux.constants import Constants
from lux.game import Game
# from lux.game_constants import GAME_CONSTANTS
# from lux.game_map import RESOURCE_TYPES, Cell, Position
#
# import log  # no qa
# from optuna import create_study, exceptions

# import logging as lg
from itertools import repeat
#
import torch.nn as nn
from torch.nn import functional as f
from torch import as_tensor, cat, argmax, ones, tensor, mean as pt_mean, stack, log, zeros, no_grad, load, save
from torch.optim import Adam
from torch.distributions import Categorical


# !pip install kaggle-environments -U > /dev/null 2>&1
# !cp -r ../input/lux-ai-2021/* .

class NN(nn.Module):
    def __init__(self,
                 input_size=9,
                 out_channels=32,
                 kernel_size=2,
                 hidden_layers=10,
                 w_output_dim=7,
                 ct_output_dim=2,
                 kt_output_dim=2,
                 ):
        super().__init__()
        self.out_channels = out_channels
        self.w_output_dim = w_output_dim
        self.ct_output_dim = ct_output_dim
        self.kt_output_dim = kt_output_dim

        # self.linear = nn.Linear()

        self.to_conv2d = nn.Conv2d(in_channels=input_size,
                                   out_channels=out_channels,
                                   kernel_size=kernel_size)
        self.conv2d = nn.Conv2d(in_channels=out_channels,
                                out_channels=out_channels,
                                kernel_size=kernel_size)

        self.conv2ds = nn.ModuleList(
            [self.conv2d for _ in range(hidden_layers)])
        # self.shape_helper = ones((1, 32, 32))

        self.helper = 3200

        self.to_w = nn.Linear(self.helper, 64)
        self.w_output = nn.Linear(64, self.w_output_dim)

        self.to_ct = nn.Linear(self.helper, 64)
        self.ct_output = nn.Linear(64, self.ct_output_dim)

        self.to_kt = nn.Linear(self.helper, 64)
        self.kt_output = nn.Linear(64, self.kt_output_dim)

    def forward(self, x):
        """Forward/Predict"""
        step_size = x.shape[0]
        x = f.selu(self.to_conv2d(x))

        for conv2d in self.conv2ds:
            x = f.max_pool2d(f.selu(conv2d(x)), kernel_size=2, stride=1)
        x = x.reshape(step_size, x.shape[1] * x.shape[2] * x.shape[3])

        x_w = self.to_w(x)
        logits = self.w_output(x_w).reshape(step_size, -1)
        w_probs = f.softmax(logits, dim=-1).squeeze()
        w_action = Categorical(w_probs).sample()

        x_ct = self.to_ct(x)
        logits = self.ct_output(x_ct).reshape(step_size, -1)
        ct_probs = f.softmax(logits, dim=-1).squeeze()
        ct_action = Categorical(ct_probs).sample()

        return w_probs, w_action, ct_probs, ct_action


### HP ###
LOCALITY = 15
TRAIN_EVERY = 40  # multiple of 360
LR = 0.0001
Q_BASE = 0.4
DISC_FACTOR = 0.9999
##########


def id_day_night(step):
    res = (step % 40)
    if res >= 0 and res < 30:
        return 1
    else:
        return 2


def create_entity_maps(obs):
    maps = [zeros((32, 32)) for i in range(9)]

    for entity in (obs['updates']):
        strs = entity.split(' ')
        input_identifier = strs[0]

        if input_identifier == 'r':
            r_type = strs[1]
            x = int(strs[2])
            y = int(strs[3])
            amt = int(float(strs[4]))
            maps[0][x, y] = 1
            maps[1][x, y] = amt / 800

        if input_identifier == 'u':
            team = 1 if int(strs[2]) == 0 else -1
            x = int(strs[4])
            y = int(strs[5])
            cooldown = int(strs[6])
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            maps[2][x, y] = 1 * team
            maps[3][x, y] = (wood + coal + uranium + 1) / 100 * team
            maps[4][x, y] = cooldown * team

        elif input_identifier == 'ct':
            team = 1 if int(strs[1]) == 0 else -1
            x = int(strs[3])
            y = int(strs[4])
            cooldown = int(strs[5])
            maps[5][x, y] = 1 * team
            maps[6][x, y] = cooldown * team

        #####
        # TODO Add city info
        #####
        # elif input_identifier == 'c':
        #     team = 1 if int(strs[1]) == 0 else -1
        #     fuel = int(strs[3])
        #     lightkeepup = int(strs[4])
        #     maps[7][:, :] = fuel / 100
        #     maps[8][:, :] = lightkeepup / 100

        elif input_identifier == 'ccd':
            x = int(strs[1])
            y = int(strs[2])
            level = int(strs[3])
            maps[7][x, y] = level

    # Additional maps
    maps[8][:, :] = id_day_night(obs['step'])

    return maps


def create_local_entity_maps(unit,
                             maps,
                             pad_amt=LOCALITY + 1,
                             locality=LOCALITY):
    x = unit.pos.x + pad_amt
    y = unit.pos.y + pad_amt

    x_lb = x - locality
    x_ub = x + locality + 1
    y_lb = y - locality
    y_ub = y + locality + 1

    pad = nn.ConstantPad2d(pad_amt, 0)

    new_maps = []
    for map_ in maps:
        new_maps.append(pad(map_)[x_lb:x_ub, y_lb:y_ub])

    return new_maps


def make_board_inputs(maps):
    inputs = stack(maps)
    inputs = inputs.unsqueeze(0)
    return inputs


def buffer_data():
    """Store and buffer data helper net"""
    bag = []
    while True:
        data = yield bag
        bag.append(data)


def calc_qvals(rewards, disc_factor=DISC_FACTOR, q_baseline=Q_BASE):
    w = []
    sum_rew = 0.0
    for reward in reversed(rewards):
        sum_rew *= disc_factor
        sum_rew += reward
        w.append(sum_rew)
    w = list(reversed(w))
    w = tensor(w)
    w = w - w.quantile(q_baseline)
    # w = (w - w.mean()) / (w.std() + 0.1)
    w = w.tolist()
    return w


def raw_action_to_unit_action(entity_id, entity, raw_action):
    if entity_id == 'u':
        if raw_action == 0:
            action = entity.move('n')
        if raw_action == 1:
            action = entity.move('w')
        if raw_action == 2:
            action = entity.move('s')
        if raw_action == 3:
            action = entity.move('e')
        if raw_action == 4:
            action = entity.move('c')
        if raw_action == 5:
            action = entity.build_city()
        if raw_action == 6:
            action = entity.pillage()
        return action
    if entity_id == 'ct':
        if raw_action == 0:
            action = entity.research()
        if raw_action == 1:
            action = entity.build_worker()
        return action


game_state = None
net = NN()
try:
    net = net.cuda()
    print('=> Net created')
    sd = load('/kaggle/input/net-model/net_state_dict.pth')
    net.load_state_dict(sd)
    print('=> Net loaded from checkpoint')
except BaseException as e:
    print('=> Error =', e)
    net = net.cuda()

optimizer = Adam(net.parameters(), lr=LR)  # , lr=LR Adam AdamW Adamax
board_inputs = None
pseudo_rews = None
cts = None
units = None


def my_agent(observation, configuration=None):
    global game_state
    global net
    global board_inputs
    global pseudo_rews
    global cts
    global units

    ### Do not edit ###
    if observation["step"] == 0:
        game_state = Game()
        game_state._initialize(observation["updates"])
        game_state._update(observation["updates"][2:])
        game_state.id = observation.player

        cts = 0
        # units = 1
    else:
        game_state._update(observation["updates"])
    player = game_state.players[observation.player]

    # Prep
    maps = create_entity_maps(observation)

    units = [unit for unit in player.units]
    units = zip(repeat('u', len(units)), units)

    cts = [ct for c in list(player.cities.values()) for ct in c.citytiles]
    cts = zip(repeat('ct', len(cts)), cts)

    entities = chain(units, cts)

    # Actions
    actions = []
    for entity_id, entity in entities:
        maps_ = create_local_entity_maps(entity, maps)
        board_inputs = make_board_inputs(maps_).cuda()
        if entity_id == 'u':
            #    if entity.can_act():
            with no_grad():
                # move, pillage, build-ct
                _, w_action, _, _ = net(board_inputs)
                action = raw_action_to_unit_action(entity_id, entity, w_action)
                actions.append(action)
        if entity_id == 'ct':
            with no_grad():
                # research, build-worker, 
                _, _, _, ct_action = net(board_inputs)
                action = raw_action_to_unit_action(entity_id, entity, ct_action)
                actions.append(action)

    cts = len([ct for c in list(player.cities.values()) for ct in c.citytiles])

    # Rewards
    pseudo_rews = [c.fuel/10 for c in list(player.cities.values())]
    pseudo_rews = pseudo_rews if (len(pseudo_rews) > 0) else [1]
    pseudo_rews = [elem if elem != 0 else 1 for elem in pseudo_rews]
    # a = 2
    # # Generalized Mean
    # pseudo_rews = (sum([ elem ** a for elem in pseudo_rews ]) / len(pseudo_rews)) ** (1/a)
    pseudo_rews = sum(pseudo_rews) * cts

    # Report
    print(cts, end=' ')
    print(len(player.units), end=' ')
    print(f'{pseudo_rews:.0f}', end=' - ')

    # Complete
    if (observation['step'] + 1) == 360:
        print('===> Board Ended Complete')

    return actions


rews = []
games = 1
while TRAIN:
    # for game in range(1, N_GAMES + 1):
    # buffer
    buff = buffer_data()
    next(buff)

    # ENV
    env = make("lux_ai_2021",
                configuration={
                    "loglevel": 1,
                    "annotations": True
                },
                debug=True)

    # ENV TRAINER
    trainer = env.train([None, 'simple_agent'])
    obs, done = trainer.reset(), False
    steps = 1

    # LOOP
    print('=> New board')
    while not done:
        actions = my_agent(obs)
        obs, reward, done, info = trainer.step(actions)

        buff.send((board_inputs, pseudo_rews, cts))

        if steps % TRAIN_EVERY == 0:
            data = buff.send(None)[:-1]
            board_inputs_ = [board_inputs_ for board_inputs_, _, _ in data]
            board_inputs_ = stack(board_inputs_).squeeze(1).cuda()
            w_probs, w_action, ct_probs, ct_action = net(board_inputs_)
            
            w_probs = w_probs[range(len(w_probs)), w_action]
            log_w_probs = log(w_probs)

            ct_probs = ct_probs[range(len(ct_probs)), ct_action]
            log_ct_probs = log(ct_probs)

            pseudo_rews_ = [pseudo_rews_ for _, pseudo_rews_, _ in data]
            rews.append(mean(pseudo_rews_))
            qvals = calc_qvals(pseudo_rews_)
            qvals = tensor(qvals).cuda()

            w_loss = -(log_w_probs * qvals)
            ct_loss = -(log_ct_probs * qvals)
            loss = stack((w_loss, ct_loss))
            loss = stack((w_loss, ct_loss)).mean(dim=0)
            loss = loss.mean()
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(net.parameters(), 1)
            optimizer.step()

            print(f'Last Rewards: {mean(rews[-100:]):.2f}')

            # Reset buff
            buff = buffer_data()
            next(buff)
            board_inputs_ = None
            pseudo_rews_ = None
            cts_ = None

            if (mean(rews[-100:]) > 750) or (games >= 500):
                print()
                print('  ===>  Saving model')
                save(net.state_dict(), 'net_state_dict.pth')
                done = True
                TRAIN = False

        steps += 1
    games += 1

In [None]:
from kaggle_environments import make

env = make("lux_ai_2021",
                configuration={
                    "loglevel": 1,
                    "annotations": True
                },
                debug=True)
a_run = env.run(['my_agent.py', 'my_agent.py'])
env.render(mode="ipython", width=1100, height=500)

In [None]:
!tar -czf submission.tar.gz *

# How would you enhance it? Feel free to.