In [None]:
# This notebook is based on the notebook 'Lux AI with imitation learning' by Sazuma (https://www.kaggle.com/shoheiazuma/lux-ai-with-imitation-learning)
# and has been modified to support learning and executing city actions
# This notebook is intended to be used on Google Colab with the replay data stored in Google drive
!pip install kaggle-environments -U > /dev/null 2>&1

In [None]:
# Mount the dataset from Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import json
from pathlib import Path
import os
import random
from tqdm.notebook import tqdm
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [None]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available():
        print("GPU available")
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed = 42
seed_everything(seed)

GPU available


# Preprocessing

In [None]:
# City Tile Actions
def to_label_ct(action):
    strs = action.split(' ')
    #print(strs)
    pos_x = ''
    pos_y = ''
    
    if strs[0] == 'r':
        pos_x = strs[1]
        pos_y = strs[2]
        label = 0
    elif strs[0] == 'bw':
        pos_x = strs[1]
        pos_y = strs[2]
        label = 1
    elif strs[0] == 'bc':
        pos_x = strs[1]
        pos_y = strs[2]
        label = 2
    else:
        label = None
    return pos_x, pos_y, label

def to_label(action):
    strs = action.split(' ')
    unit_id = strs[1]
    if strs[0] == 'm':
        label = {'c': None, 'n': 0, 's': 1, 'w': 2, 'e': 3}[strs[2]]
    elif strs[0] == 'bcity':
        label = 4
    elif strs[0] == 'p':
        label = 5
    elif strs[0] == 't':
        # The transfer actions has a destination, resource type, and amount
        # TODO: handle this 
        label = None
    else:
        label = None
    return unit_id, label


def depleted_resources(obs):
    for u in obs['updates']:
        if u.split(' ')[0] == 'r':
            return False
    return True


def create_dataset_from_json(episode_dir, team_name=None): 
    obses = {}
    obses_ct = {}
    samples = []
    samples_ct = []
    #append = samples.append
    
    episodes = [path for path in Path(episode_dir).glob('*.json') if 'output' not in path.name and 'info' not in path.name]
    for filepath in tqdm(episodes):
        print(filepath)
        with open(filepath) as f:
            json_load = json.load(f)

        ep_id = json_load['info']['EpisodeId']
        index = np.argmax([r or 0 for r in json_load['rewards']])
        if team_name != None and json_load['info']['TeamNames'][index] != team_name:
            continue

        for i in range(len(json_load['steps'])-1):
            if json_load['steps'][i][index]['status'] == 'ACTIVE':
                actions = json_load['steps'][i+1][index]['action']
                obs = json_load['steps'][i][0]['observation']
                
                if depleted_resources(obs):
                    break
                
                obs['player'] = index
                obs = dict([
                    (k,v) for k,v in obs.items() 
                    if k in ['step', 'updates', 'player', 'width', 'height']
                ])
                obs_id = f'{ep_id}_{i}'
                obses[obs_id] = obs
                obses_ct[obs_id] = obs
                                
                for action in actions:
                    unit_id, label = to_label(action)
                    pos_x, pos_y, label_ct = to_label_ct(action)
                    if label is not None:
                        samples.append((obs_id, unit_id, label))
                        #obses[obs_id] = obs
                    if label_ct is not None:
                        samples_ct.append((obs_id, pos_x, pos_y, label_ct))
                        #obses_ct[obs_id] = obs

    return obses, samples, obses_ct, samples_ct

In [None]:
episode_dir = './drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021'
obses, samples, obses_ct, samples_ct = create_dataset_from_json(episode_dir)
print('obses:', len(obses), 'samples:', len(samples), 'obses_ct:', len(obses_ct), 'samples_ct:', len(samples_ct))

  0%|          | 0/391 [00:00<?, ?it/s]

drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26688997.json
drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26689645.json
drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26689435.json
drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26689787.json
drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26689505.json
drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26689365.json
drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26689575.json
drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26689999.json
drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26689929.json
drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26689717.json
drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26689859.json
drive/Shareddrives/CSCI 527 Fall 2021/training_data/replay_10_4_2021/26690350.json
driv

In [None]:
labels = [sample[-1] for sample in samples]
actions = ['north', 'south', 'west', 'east', 'bcity', 'p']
for value, count in zip(*np.unique(labels, return_counts=True)):
    print(f'{actions[value]:^5}: {count:>3}')

print()
labels_ct = [sample_ct[-1] for sample_ct in samples_ct]
actions_ct = ['research', 'build worker', 'build cart']
for value, count in zip(*np.unique(labels_ct, return_counts=True)):
    print(f'{actions_ct[value]:^5}: {count:>3}')

north: 138207
south: 131639
west : 142865
east : 148972
bcity: 40812

research: 67645
build worker: 28701


# Training Unit Actions

In [None]:
# Input for Neural Network
def make_input(obs, unit_id):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            if unit_id == strs[3]:
                # Position and Cargo
                b[:2, x, y] = (
                    1,
                    (wood + coal + uranium) / 100
                )
            else:
                # Units
                team = int(strs[2])
                cooldown = float(strs[6])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x, y] = (
                    1,
                    cooldown / 6,
                    (wood + coal + uranium) / 100
                )
        elif input_identifier == 'ct':
            # CityTiles
            team = int(strs[1])
            city_id = strs[2]
            x = int(strs[3]) + x_shift
            y = int(strs[4]) + y_shift
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                cities[city_id]
            )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b


In [None]:
class LuxDataset(Dataset):
    def __init__(self, obses, samples):
        self.obses = obses
        self.samples = samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        obs_id, unit_id, action = self.samples[idx]
        obs = self.obses[obs_id]
        state = make_input(obs, unit_id)

        return state, action


In [None]:
# Neural Network for Lux AI
class BasicConv2d(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.conv = nn.Conv2d(
            input_dim, output_dim, 
            kernel_size=kernel_size, 
            padding=(kernel_size[0] // 2, kernel_size[1] // 2)
        )
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = self.conv(x)
        h = self.bn(h) if self.bn is not None else h
        return h


In [None]:
class LuxNet(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = BasicConv2d(20, filters, (3, 3), True)
        self.blocks = nn.ModuleList(
            [BasicConv2d(filters, filters, (3, 3), True) for _ in range(layers)])
        self.head_p = nn.Linear(filters, 5, bias=False)

    def forward(self, x):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))
        h_head = (h * x[:, :1]).view(h.size(0), h.size(1), -1).sum(-1)
        p = self.head_p(h_head)
        return p


In [None]:
def train_model(model, dataloaders_dict, criterion, optimizer, num_epochs):
    best_acc = 0.0
    device = torch.device("cpu")
    for epoch in range(num_epochs):
        model.cuda()
        #model.to(device)
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            epoch_loss = 0.0
            epoch_acc = 0
            
            dataloader = dataloaders_dict[phase]
            for item in tqdm(dataloader, leave=False):
                states = item[0].cuda().float()
                actions = item[1].cuda().long()

                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    policy = model(states)
                    loss = criterion(policy, actions)
                    _, preds = torch.max(policy, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    epoch_loss += loss.item() * len(policy)
                    epoch_acc += torch.sum(preds == actions.data)

            data_size = len(dataloader.dataset)
            epoch_loss = epoch_loss / data_size
            epoch_acc = epoch_acc.double() / data_size

            print(f'Epoch {epoch + 1}/{num_epochs} | {phase:^5} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}')
        
        if epoch_acc > best_acc:
            traced = torch.jit.trace(model.cpu(), torch.rand(1, 20, 32, 32))
            traced.save('model.pth')
            best_acc = epoch_acc


In [None]:
model = LuxNet()
train, val = train_test_split(samples, test_size=0.1, random_state=42, stratify=labels)
batch_size = 64
train_loader = DataLoader(
    LuxDataset(obses, train), 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=2
)
val_loader = DataLoader(
    LuxDataset(obses, val), 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=2
)
dataloaders_dict = {"train": train_loader, "val": val_loader}
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)


In [None]:
train_model(model, dataloaders_dict, criterion, optimizer, num_epochs=2)

  0%|          | 0/8473 [00:00<?, ?it/s]

Epoch 1/2 | train | Loss: 0.9444 | Acc: 0.6013


  0%|          | 0/942 [00:00<?, ?it/s]

Epoch 1/2 |  val  | Loss: 0.8360 | Acc: 0.6508


  0%|          | 0/8473 [00:00<?, ?it/s]

Epoch 2/2 | train | Loss: 0.8038 | Acc: 0.6671


  0%|          | 0/942 [00:00<?, ?it/s]

Epoch 2/2 |  val  | Loss: 0.7907 | Acc: 0.6693


In [None]:
with open('./drive/Shareddrives/CSCI 527 Fall 2021/training_data/model.pth', 'w') as f:
  f.write('model.pth')

# Training City actions

In [None]:
# Input for Neural Network

def make_input_ct(obs, pos_x, pos_y):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            # Units
            team = int(strs[2])
            cooldown = float(strs[6])
            idx = 0 + (team - obs['player']) % 2 * 3
            b[idx:idx + 3, x, y] = (
                1,
                cooldown / 6,
                (wood + coal + uranium) / 100
            )
        # Why is cooldown divided by 6??
        elif input_identifier == 'ct':
            # CityTiles
            team = int(strs[1])
            city_id = strs[2]
            x = int(strs[3]) + x_shift
            y = int(strs[4]) + y_shift
            cooldown = float(strs[5])
            idx = 6 + (team - obs['player']) % 2 * 3
            b[idx:idx + 3, x, y] = (
                1,
                cities[city_id],
                cooldown / 6
            )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b


In [None]:
class LuxDataset(Dataset):
    def __init__(self, obses, samples):
        self.obses = obses
        self.samples = samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        obs_id, pos_x, pos_y, action = self.samples[idx]
        obs = self.obses[obs_id]
        state = make_input_ct(obs, pos_x=pos_x, pos_y=pos_y)

        return state, action


In [None]:
# Neural Network for Lux AI
class BasicConv2d(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.conv = nn.Conv2d(
            input_dim, output_dim, 
            kernel_size=kernel_size, 
            padding=(kernel_size[0] // 2, kernel_size[1] // 2)
        )
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = self.conv(x)
        h = self.bn(h) if self.bn is not None else h
        return h


In [None]:
class LuxNet(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = BasicConv2d(20, filters, (3, 3), True)
        self.blocks = nn.ModuleList(
            [BasicConv2d(filters, filters, (3, 3), True) for _ in range(layers)])
        self.head_p = nn.Linear(filters, 3, bias=False)

    def forward(self, x):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))
        h_head = (h * x[:, :1]).view(h.size(0), h.size(1), -1).sum(-1)
        p = self.head_p(h_head)
        return p


In [None]:
def train_model(model, dataloaders_dict, criterion, optimizer, num_epochs):
    best_acc = 0.0
    device = torch.device("cpu")
    for epoch in range(num_epochs):
        model.cuda()
        #model.to(device)
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            epoch_loss = 0.0
            epoch_acc = 0
            
            dataloader = dataloaders_dict[phase]
            for item in tqdm(dataloader, leave=False):
                states = item[0].cuda().float()
                actions = item[1].cuda().long()

                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    policy = model(states)
                    loss = criterion(policy, actions)
                    _, preds = torch.max(policy, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    epoch_loss += loss.item() * len(policy)
                    epoch_acc += torch.sum(preds == actions.data)

            data_size = len(dataloader.dataset)
            epoch_loss = epoch_loss / data_size
            epoch_acc = epoch_acc.double() / data_size

            print(f'Epoch {epoch + 1}/{num_epochs} | {phase:^5} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}')
        
        if epoch_acc > best_acc:
            traced = torch.jit.trace(model.cpu(), torch.rand(1, 20, 32, 32))
            traced.save('model_ct.pth')
            best_acc = epoch_acc


In [None]:
model = LuxNet()
train, val = train_test_split(samples_ct, test_size=0.1, random_state=42, stratify=labels_ct)
batch_size = 64
train_loader = DataLoader(
    LuxDataset(obses_ct, train), 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=2
)
val_loader = DataLoader(
    LuxDataset(obses_ct, val), 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=2
)
dataloaders_dict = {"train": train_loader, "val": val_loader}
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)


In [None]:
train_model(model, dataloaders_dict, criterion, optimizer, num_epochs=10)

  0%|          | 0/1355 [00:00<?, ?it/s]

Epoch 1/10 | train | Loss: 0.8039 | Acc: 0.7736


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 1/10 |  val  | Loss: 0.4283 | Acc: 0.8133


  0%|          | 0/1355 [00:00<?, ?it/s]

Epoch 2/10 | train | Loss: 0.4252 | Acc: 0.8171


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 2/10 |  val  | Loss: 0.3979 | Acc: 0.8254


  0%|          | 0/1355 [00:00<?, ?it/s]

Epoch 3/10 | train | Loss: 0.4071 | Acc: 0.8266


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 3/10 |  val  | Loss: 0.3989 | Acc: 0.8252


  0%|          | 0/1355 [00:00<?, ?it/s]

Epoch 4/10 | train | Loss: 0.3936 | Acc: 0.8325


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 4/10 |  val  | Loss: 0.4070 | Acc: 0.8271


  0%|          | 0/1355 [00:00<?, ?it/s]

Epoch 5/10 | train | Loss: 0.3858 | Acc: 0.8351


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 5/10 |  val  | Loss: 0.3702 | Acc: 0.8384


  0%|          | 0/1355 [00:00<?, ?it/s]

Epoch 6/10 | train | Loss: 0.3759 | Acc: 0.8400


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 6/10 |  val  | Loss: 0.3721 | Acc: 0.8404


  0%|          | 0/1355 [00:00<?, ?it/s]

Epoch 7/10 | train | Loss: 0.3688 | Acc: 0.8432


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 7/10 |  val  | Loss: 0.3751 | Acc: 0.8379


  0%|          | 0/1355 [00:00<?, ?it/s]

Epoch 8/10 | train | Loss: 0.3595 | Acc: 0.8474


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 8/10 |  val  | Loss: 0.3786 | Acc: 0.8391


  0%|          | 0/1355 [00:00<?, ?it/s]

Epoch 9/10 | train | Loss: 0.3512 | Acc: 0.8500


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 9/10 |  val  | Loss: 0.3623 | Acc: 0.8437


  0%|          | 0/1355 [00:00<?, ?it/s]

Epoch 10/10 | train | Loss: 0.3452 | Acc: 0.8514


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 10/10 |  val  | Loss: 0.3734 | Acc: 0.8413


In [None]:
with open('./drive/Shareddrives/CSCI 527 Fall 2021/training_data/model_ct.pth', 'w') as f:
  f.write('model_ct.pth')

# Submission

In [None]:
%%writefile agent.py
import os
import numpy as np
import torch
from lux.game import Game


path = '/kaggle_simulations/agent' if os.path.exists('/kaggle_simulations') else '.'
model = torch.jit.load(f'{path}/model.pth')
model.eval()

model_ct = torch.jit.load(f'{path}/model_ct.pth')
model_ct.eval()

# Input for Neural Network
def make_input_ct(obs, pos_x, pos_y):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((22, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            """
            if unit_id == strs[3]:
                # Position and Cargo
                b[:2, x, y] = (
                    1,
                    (wood + coal + uranium) / 100
                )
            else:
            """
            # Units
            team = int(strs[2])
            cooldown = float(strs[6])
            idx = 0 + (team - obs['player']) % 2 * 3
            b[idx:idx + 3, x, y] = (
                1,
                cooldown / 6,
                (wood + coal + uranium) / 100
            )
        # Why is cooldown divided by 6??
        elif input_identifier == 'ct':
            # CityTiles
            team = int(strs[1])
            city_id = strs[2]
            x = int(strs[3]) + x_shift
            y = int(strs[4]) + y_shift
            cooldown = float(strs[5])
            if pos_x == x and pos_y == y:
                b[6:8, x, y] = (
                    1,
                    cities[city_id]
                )
            else:
                idx = 8 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x, y] = (
                    1,
                    cities[city_id],
                    cooldown / 6
                )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 14, 'coal': 15, 'uranium': 16}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[17 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[19, :] = obs['step'] % 40 / 40
    # Turns
    b[20, :] = obs['step'] / 360
    # Map Size
    b[21, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b



def make_input(obs, unit_id):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            if unit_id == strs[3]:
                # Position and Cargo
                b[:2, x, y] = (
                    1,
                    (wood + coal + uranium) / 100
                )
            else:
                # Units
                team = int(strs[2])
                cooldown = float(strs[6])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x, y] = (
                    1,
                    cooldown / 6,
                    (wood + coal + uranium) / 100
                )
        elif input_identifier == 'ct':
            # CityTiles
            team = int(strs[1])
            city_id = strs[2]
            x = int(strs[3]) + x_shift
            y = int(strs[4]) + y_shift
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                cities[city_id]
            )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b


game_state = None
def get_game_state(observation):
    global game_state
    
    if observation["step"] == 0:
        game_state = Game()
        game_state._initialize(observation["updates"])
        game_state._update(observation["updates"][2:])
        game_state.id = observation["player"]
    else:
        game_state._update(observation["updates"])
    return game_state


def in_city(pos):    
    try:
        city = game_state.map.get_cell_by_pos(pos).citytile
        return city is not None and city.team == game_state.id
    except:
        return False


def call_func(obj, method, args=[]):
    return getattr(obj, method)(*args)


unit_actions = [('move', 'n'), ('move', 's'), ('move', 'w'), ('move', 'e'), ('build_city',), ('pillage',)]
def get_action(policy, unit, dest):
    for label in np.argsort(policy)[::-1]:
        act = unit_actions[label]
        pos = unit.pos.translate(act[-1], 1) or unit.pos
        if pos not in dest or in_city(pos):
            return call_func(unit, *act), pos 
            
    return unit.move('c'), unit.pos


def agent(observation, configuration):
    global game_state
    
    game_state = get_game_state(observation)    
    player = game_state.players[observation.player]
    actions = []
    
    # City Actions
    """
    unit_count = len(player.units)
    for city in player.cities.values():
        for city_tile in city.citytiles:
            if city_tile.can_act():
                if unit_count < player.city_tile_count: 
                    actions.append(city_tile.build_worker())
                    unit_count += 1
                elif not player.researched_uranium():
                    actions.append(city_tile.research())
                    player.research_points += 1
    """

    #unit_count = len(player.units)
    for city in player.cities.values():
        for city_tile in city.citytiles:
            if city_tile.can_act():
                state = make_input_ct(obervation, city_tile.pos.x, city_tile.pos.y)
                    with torch.no_grad():
                        p = model_ct(torch.from_numpy(state).unsqueeze(0))

                    policy = p.squeeze(0).numpy()
                    #for label in np.argsort(policy)[::-1]:
                        #act = unit_actions[label]
                    label = np.argmax(policy)
                    if label == 0:
                        actions.append(city_tile.research())
                    elif label == 1:
                        actions.append(city_tile.build_worker())
                    elif label == 2:
                        actions.append(city_tile.build_cart())
                    else:
                        print(p)
                        print(policy)
                        print(label)
                        print("ERROR: Invalid city action")


    
    # Worker Actions
    dest = []
    for unit in player.units:
        if unit.can_act() and (game_state.turn % 40 < 30 or not in_city(unit.pos)):
            state = make_input(observation, unit.id)
            with torch.no_grad():
                p = model(torch.from_numpy(state).unsqueeze(0))

            policy = p.squeeze(0).numpy()

            action, pos = get_action(policy, unit, dest)
            actions.append(action)
            dest.append(pos)

    return actions

Overwriting agent.py


In [None]:
from kaggle_environments import make

env = make("lux_ai_2021", configuration={"width": 24, "height": 24, "loglevel": 2, "annotations": True}, debug=False)
steps = env.run(['agent.py', 'agent.py'])
env.render(mode="ipython", width=1200, height=800)