In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from torch import optim
from torch.utils.data import DataLoader, Dataset
from statistics import mean
from torch import load, max as pt_max, ones, save, no_grad
from torch import nn
from torch.nn import functional as F
from torch.optim import Adam
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Here we train a net to classify between 4 classes:
# 0: 12
# 1: 16
# 2: 24
# 3: 32

# How do I get the board encoder?
# The trick is to code in the net forward method not only the full feed-forward, 
# but also some intermediate layer, which is suppossed to be an encoder

# So, despite of each board being different heights (and resources placement), 
# the net has achieved a fixed-length output of 8 X 8

# No matter, what board we feed it into, the net will 
# traslate it to a new latent space of 8 X 8


class DS(Dataset):
    def __init__(self, maps, labels) -> None:
        self.maps = maps
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        X = self.maps[idx]
        y = self.labels[idx]
        return X, y


class MAPS_NET(nn.Module):
    def __init__(
        self,
        input_size=2,
        out_channels=48,
        kernel_size=3,
        hidden_layers=9,
        output_dim=4,
    ):
        super().__init__()
        self.out_channels = out_channels

        self.to_conv2d = nn.Conv2d(in_channels=input_size,
                                   out_channels=out_channels,
                                   kernel_size=kernel_size)
        self.conv2d = nn.Conv2d(in_channels=out_channels,
                                out_channels=out_channels,
                                kernel_size=kernel_size)

        self.conv2ds = nn.ModuleList(
            [self.conv2d for _ in range(hidden_layers)])
        # self.shape_helper = ones((1, 32, 32))

        self.helper = 432

        self.to_linear = nn.Linear(self.helper, 64)
        self.output = nn.Linear(64, output_dim)

    def forward(self, x):
        """Forward/Predict"""
        step_size = x.shape[0]
        x = self.to_conv2d(x)

        for conv2d in self.conv2ds:
            x = F.max_pool2d(F.celu(F.dropout2d(conv2d(x), p=0.4)),
                             kernel_size=2,
                             stride=1)
        x = x.reshape(step_size, x.shape[1] * x.shape[2] * x.shape[3])

        x = self.to_linear(x)
        res = self.output(x)
        res = F.log_softmax(res, dim=-1)

        return x, res

if __name__ == '__main__':
    # Data
    maps = load('/kaggle/input/envdata4/env_maps.pth')
    print(maps.shape)
    labels = load('/kaggle/input/envdata4/env_maps_labels.pth')
    print(labels.shape)

    # HP
    BATCH_SIZE = 32
    LR = 0.00075

    # Net
    net = MAPS_NET().train().cuda()
    optimizer = Adam(net.parameters(), lr=LR)
    criterion = nn.NLLLoss()

    # Split
    train_X, test_X, train_y, test_y = train_test_split(maps, labels)

    # Train
    train_ds = DS(train_X, train_y)
    train_dl = DataLoader(train_ds,
                        batch_size=BATCH_SIZE,
                        shuffle=True,
                        drop_last=True)
    for epoch in range(5):
        running_loss = []
        correct = 0
        total = 0
        i = 0
        for inputs, labels in train_dl:
            inputs = inputs.cuda()
            labels = labels.cuda()
            encoder, outputs = net(inputs)

            optimizer.zero_grad()
            loss = criterion(outputs, labels).mean()
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss.append(loss.item())
            if i % 10 == 0:
                print('[%d, %5d] Loss: %.3f' %
                    (epoch + 1, i + 1, mean(running_loss)),
                    end=' - ')
                _, predicted = pt_max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                print('Accuracy: %d %%' % (100 * correct / total))

            i += 1

    # Eval
    with no_grad():
        net = net.eval()
        test_ds = DS(test_X, test_y)
        test_dl = DataLoader(test_ds,
                            batch_size=BATCH_SIZE,
                            shuffle=True,
                            drop_last=True)
        running_loss = []
        correct = 0
        total = 0
        i = 0
        for inputs, labels in test_dl:
            inputs = inputs.cuda()
            labels = labels.cuda()
            encoder, outputs = net(inputs)

            # print statistics
            running_loss.append(loss.item())
            if i % 10 == 0:
                print('Test Loss: %.3f' % (mean(running_loss)), end=' - ')
                _, predicted = pt_max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                print('Test Accuracy: %d %%' % (100 * correct / total))

            i += 1

    # Persist
    #save(net.state_dict(), f'env_maps_net_{100 * correct / total:.1f}%.pth')

In [None]:
from torch import zeros, stack, tensor

import matplotlib.pyplot as plt

from kaggle_environments import make

# Here I just code to extract a board from a env lux game

class ENV_DATA:
    def __init__(self) -> None:
        pass

    def draw_board(self):
        return make("lux_ai_2021",
                    configuration={
                        "loglevel": 1,
                        "annotations": True
                    },
                    debug=True)

    def get_init_obs(self, board):
        _ = board.train([None, 'simple_agent'])
        obs, _ = _.reset(), False
        return obs

    def resource_type_to_num(self, r_type):
        dic = {'wood': 1, 'coal': 2, 'uranium': 3}
        return dic[r_type]

    def create_entity_maps(self, obs):
        wh = obs['height']

        maps = [zeros((32, 32)) for i in range(2)]

        for entity in (obs['updates']):
            strs = entity.split(' ')
            input_identifier = strs[0]

            if input_identifier == 'r':
                r_type = strs[1]
                x = int(strs[2])
                y = int(strs[3])
                amt = int(float(strs[4]))
                maps[0][x, y] = 1
                maps[1][x, y] = self.resource_type_to_num(r_type)
                # maps[2][x, y] = amt / 800

        return wh, maps
    
    def wh_to_y(self, wh):
        d = {12: 0, 16: 1, 24: 2, 32: 3}
        return d[wh]

    def get_maps(self):
        board = self.draw_board()
        init_obs = self.get_init_obs(board)
        wh, maps = self.create_entity_maps(init_obs)
        return stack(maps), tensor(wh)
    
    def get_latent(self):
        wh, maps = self.get_maps()
        return maps, wh

In [None]:
maps.shape

In [None]:
# Hit this cell many times in order to see different maps
# and its corresponding encoding

env_data = ENV_DATA()

fig, ax = plt.subplots(1, 3, figsize=(12,8), constrained_layout=True)
ax = ax.ravel()

wh, maps_ = env_data.get_latent()
_ = ax[0].imshow(maps_[0])
_ = ax[0].set_title('Resources Position')
_ = ax[1].imshow(maps_[1])
_ = ax[1].set_title('Resources Types')
encoded = net(maps_.unsqueeze(0).cuda())[0].reshape(8,8)
encoded = encoded.detach().cpu().numpy()
_ = ax[2].imshow(encoded)
_ = ax[2].set_title('Net Representation (+97%)')

In [None]:
# WHY I DID ALL THIS?

# My thinking is that, when you do RL, you are doing many things at once
# I feel the net gets too much noise

# For once, it's trying to learn the inputs representation
# all the way to i.e. actions distribution,
# Also, it's trying to discriminate between good/bad actions
# All with the same loss value...

# I thought that by, a priori, fitting this representation, 
# the net used in RL will have better inputs for better learning
# at least for latent maps

# TODO
# Maybe do not feed into this net the initial resources amounts, and re-fit it,
# because, as resources amounts will be changing in game due to units gathering
# The net may have a hard time encoding such never-before-seen scenarios
# thus giving misleading representations...
# ### DONE!