In [None]:
!pip install kaggle-environments -U

In [None]:
from kaggle_environments import make
import json

In [None]:
from lux.game import Game
from lux.game_map import Cell, RESOURCE_TYPES, Position
from lux.game_objects import Unit
from lux.constants import Constants
from lux.game_constants import GAME_CONSTANTS
from lux import annotate


In [None]:
#%%writefile agent.py
# housekeeping
import math, sys


# for kaggle-environments
from lux.game import Game
from lux.game_map import Cell, RESOURCE_TYPES, Position
from lux.game_objects import Unit
from lux.constants import Constants
from lux.game_constants import GAME_CONSTANTS
from lux import annotate

DIRECTIONS = Constants.DIRECTIONS
game_state = None

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count
from PIL import Image

# set up matplotlib

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))


class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([],maxlen=capacity)

    def push(self, *args):
        """Save a transition"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)
    
class DQN(nn.Module):

    def __init__(self, h, w, outputs):
        super(DQN, self).__init__()
        self.conv1 = nn.Conv2d(10, 32, kernel_size=4, stride=2)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=4, stride=2)
        self.bn3 = nn.BatchNorm2d(64)

        # Number of Linear input connections depends on output of conv2d layers
        # and therefore the input image size, so compute it.
        def conv2d_size_out(size, kernel_size = 4, stride = 2):
            return (size - (kernel_size - 1) - 1) // stride  + 1
        convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
        convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
        linear_input_size = convw * convh * 64
        self.head = nn.Linear(linear_input_size, outputs)

    # Called with either one element to determine next action, or a batch
    # during optimization. Returns tensor([[left0exp,right0exp]...]).
    def forward(self, x):
        x = x.to(device)
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        return self.head(x.view(x.size(0), -1))
    
def get_screen(game_state, unit=0):
    w,h = game_state.map.width, game_state.map.height
    M = [ [0  if game_state.map.map[j][i].resource==None else game_state.map.map[j][i].resource.amount for i in range(w)]  for j in range(h)]

    M = np.array(M).reshape((w,h,1))
    
    U = [ [[0,0,0,0,0] for i in range(w)]  for j in range(h)]
    units = game_state.players[0].units
    for i in units:
        U[i.pos.y][i.pos.x] = [i.type,i.cooldown,i.cargo.wood,i.cargo.coal,i.cargo.uranium]
        
    if type(unit) != int:
        U[unit.pos.y][unit.pos.x] = [unit.type+100,unit.cooldown,unit.cargo.wood,unit.cargo.coal,unit.cargo.uranium]
    
    U = np.array(U)
    
    e = game_state.players[1].cities
    C = [ [[0,0,0,0] for i in range(w)]  for j in range(h)]
    for k in e:
        citytiles = e[k].citytiles
        for i in citytiles:
            C[i.pos.y][i.pos.x] = [i.cooldown,e[k].fuel,e[k].light_upkeep,e[k].team]

    C = np.array(C)
    #print(M.shape,U.shape,C.shape)
    E = np.dstack([M,U,C])
    return torch.tensor(E,dtype=torch.float).reshape([1, 10, 32, 32])
    
    
def get_prediction_actions(y,i):
    # move
    aactions = []
    if y <= 4:
        d = "csnwe"[y]
        if i.can_act():aactions = i.move(d)
    elif y==5 and i.can_build(game_state.map):aactions = i.build_city()
    elif y==6:aactions = i.pillage()
        
    return aactions,1
    
BATCH_SIZE = 128
GAMMA = 0.8
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200
TARGET_UPDATE = 10

#init_screen = get_screen()
screen_height, screen_width = 32, 32

# one main processor
# processors that control individual workers using encoding + state
# would need to be NSEWC, M P B
# actually could just add 100 to tile we are on

# Get number of actions from gym action space
n_actions = 7 # first five simply movement

policy_net = DQN(screen_height, screen_width, n_actions).to(device)
policy_net.load_state_dict(torch.load("../input/800env/policy"))

target_net = DQN(screen_height, screen_width, n_actions).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.RMSprop(policy_net.parameters())
memory = ReplayMemory(10000)

total_reward = 0
first = 0
steps_done = 0
last = 0


def select_action(state):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        with torch.no_grad():
            # t.max(1) will return largest column value of each row.
            # second column on max result is index of where max element was
            # found, so we pick action with the larger expected reward.
            return policy_net(state).max(1)[1].view(1, 1)
    else:
        return torch.tensor([[random.randrange(n_actions)]], device=device, dtype=torch.long)

def optimize_model():
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
    # detailed explanation). This converts batch-array of Transitions
    # to Transition of batch-arrays.
    batch = Transition(*zip(*transitions))

    # Compute a mask of non-final states and concatenate the batch elements
    # (a final state would've been the one after which simulation ended)
    non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)), device=device, dtype=torch.bool)
    non_final_next_states = torch.cat([s for s in batch.next_state
                                                if s is not None])
    state_batch = torch.cat(batch.state)
    action_batch = torch.cat(batch.action)
    reward_batch = torch.cat(batch.reward)

    # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
    # columns of actions taken. These are the actions which would've been taken
    # for each batch state according to policy_net
    state_action_values = policy_net(state_batch).gather(1, action_batch)

    # Compute V(s_{t+1}) for all next states.
    # Expected values of actions for non_final_next_states are computed based
    # on the "older" target_net; selecting their best reward with max(1)[0].
    # This is merged based on the mask, such that we'll have either the expected
    # state value or 0 in case the state was final.
    next_state_values = torch.zeros(BATCH_SIZE, device=device)
    next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach()
    # Compute the expected Q values
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    # Compute Huber loss
    criterion = nn.SmoothL1Loss()
    loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))

    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    for param in policy_net.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()

def agent(observation, configuration):
    global game_state
    global done 
    global memory
    global action
    global state
    global reward
    global first
    global total_reward
    global last

    ### Do not edit ###
    if observation["step"] == 0:
        game_state = Game()
        game_state._initialize(observation["updates"])
        game_state._update(observation["updates"][2:])
        game_state.id = observation.player
    else:
        game_state._update(observation["updates"])
    
    actions = []
    test_actions = []

    ### AI Code goes down here! ### 
    player = game_state.players[observation.player]
    opponent = game_state.players[(observation.player + 1) % 2]
    width, height = game_state.map.width, game_state.map.height

    ##############################
    ### NOVEL CODE STARTS HERE ###
    ##############################
        
    
    # helper functions
    def researched(resource):
        """
        given a Resource object, return whether the player has researched the resource type
        """
        if resource.type == Constants.RESOURCE_TYPES.WOOD:
            return True
        if resource.type == Constants.RESOURCE_TYPES.COAL \
            and player.research_points >= GAME_CONSTANTS['PARAMETERS']['RESEARCH_REQUIREMENTS']['COAL']:
                return True
        if resource.type == Constants.RESOURCE_TYPES.URANIUM \
            and player.research_points >= GAME_CONSTANTS['PARAMETERS']['RESEARCH_REQUIREMENTS']['URANIUM']:
                return True
        return False

    def get_cells(cell_type):  # resource, researched resource, player citytile, enemy citytile, empty
        """
        Given a cell type, returns a list of Cell objects of the given type
        Options are: ['resource', 'researched resource', 'player citytile', 'enemy citytile', 'empty']
        """
        cells_of_type = []
        for y in range(height):
            for x in range(width):
                cell = game_state.map.get_cell(x, y)
                if (
                       ( cell_type == 'resource' and cell.has_resource() ) \
                    or ( cell_type == 'researched resource' and cell.has_resource() and researched(cell.resource) ) \
                    or ( cell_type == 'player citytile' and cell.citytile is not None and cell.citytile.team == observation.player ) \
                    or ( cell_type == 'enemy citytile' and cell.citytile is not None and cell.citytile.team != observation.player ) \
                    or ( cell_type == 'empty' and cell.citytile is None and not cell.has_resource() )
                ):
                    cells_of_type.append(cell)
        
        return cells_of_type


    #############################
    ### ALGORITHM STARTS HERE ###
    #############################

    # get all resource tiles
    researched_resource_cells = get_cells('researched resource')
    citytile_cells = get_cells('player citytile')

    # calculate number of citytiles
    num_citytiles = len(citytile_cells)
    
    if first == 0:
        last = game_state.turn
        first = 1

    elif last < game_state.turn:
        done = False
        if not done:
            next_state = get_screen(game_state)
        else:
            next_state = None
        # Store the transition in memory
        memory.push(state, action, next_state, reward)
        # Move to the next state
        # Perform one step of the optimization (on the policy network)
        optimize_model()

    elif last < game_state.turn:
        done = True
        if not done:
            next_state = get_screen(game_state)
        else:
            next_state = None
        # Store the transition in memory
        memory.push(state, action, next_state, reward)
        # Move to the next state
        # Perform one step of the optimization (on the policy network)
        optimize_model()
        
    # iterate over units
    for unit in player.units:
        
        if unit.is_worker() and unit.can_act():
            state = get_screen(game_state)
            # Select and perform an action
            action = select_action(state)
            reward = observation["reward"]/1000
            total_reward += reward
            reward = torch.tensor([reward], device=device)
            #print(action.shape)
            actions2,_ = get_prediction_actions(action,unit)
            if len(actions2) !=0:
                actions.append(actions2)
            

   # iterate through cities
    for k, city in player.cities.items():
        for citytile in city.citytiles:
            if citytile.can_act():

                # if there is space for more units, build a worker yeretis copuld consider changing soon i gues
                if num_citytiles > len(player.units):
                    actions.append(citytile.build_worker())
                
                # else research
                else:
                    actions.append(citytile.research())

    return actions


In [None]:
import time
import tqdm
from IPython.display import clear_output
t_list = []
t = tqdm.tqdm(range(500), position=0, leave=True)
for ep in t:
    env = make('lux_ai_2021', configuration={'seed': 562124210, 'loglevel': 2, 'annotations': True}, debug=True)
    steps = env.run([agent, "simple_agent"])
    target_net.load_state_dict(policy_net.state_dict())
    t_list.append(total_reward)
    clear_output()
    t.set_description_str(str(total_reward/360))
    total_reward = 0

In [None]:
torch.save(target_net.state_dict(), "target")
torch.save(policy_net.state_dict(), "policy")

In [None]:
#print(t_list)

In [None]:
env.render(mode='ipython', width=1200, height=1000)