# Install Dependencies

In [None]:
!pip install git+https://github.com/glmcdona/LuxPythonEnvGym.git

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:


import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import gym
from gym import spaces
from gym.wrappers import FlattenObservation
import numpy as np
import random
import matplotlib.pyplot as plt
from luxai2021.game.actions import MoveAction, SpawnCityAction, TransferAction, PillageAction, Action, SpawnWorkerAction, SpawnCartAction, ResearchAction
from luxai2021.game.constants import LuxMatchConfigs_Default
from luxai2021.game.game import Game
from luxai2021.game.constants import Constants
from luxai2021.game.unit import Unit
from luxai2021.game.cell import Cell
from luxai2021.game.city import City, CityTile
from typing import Any, Callable, Optional, Sequence, Text, Union, List, Tuple
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common import results_plotter
import tensorboard as tb

# Reward Function

In [None]:

def build_worker_reward(game : Game, unit_id : str, team : int) -> float:
    """calculate the reward for a worker

    Parameters
    ----------
    game : Game
        The game object
    unit_id : str
        the id of the worker such as 'u_1'
    team : int
        the integer id of the team

    Returns
    -------
    float
        the float value of the reward
    """    
    unit = my_get_unit(game, unit_id)
    ctt = my_get_citytiles(game, team)
    reward_fg = game.stats['teamStats'][team]['fuelGenerated']
    reward_cfv = 0.9 * (unit.get_cargo_fuel_value() if unit is not None else 0.0)
    reward = -1.0 + reward_fg + reward_cfv
    return reward


# Utility Functions

In [None]:
def my_is_cell_free(cell : Cell) -> bool:
    return not (cell.has_resource() or cell.is_city_tile() or cell.has_units())

def my_get_citytiles(game : Game, team : int)  -> List[CityTile]  :    
    """gets all city tiles belonging to a team

    Parameters
    ----------
    game : Game
        The game object
    team : int
        the integer id of the team

    Returns
    -------
    list
        a list of CityTile
    """
    city_tiles = []
    for city in game.cities.values():
        if city.team == team:
            for citycell in city.city_cells:
                city_tiles.append(citycell.city_tile)
    return city_tiles
 
    
def discretize_direction(direction_char):
    if Constants.DIRECTIONS.CENTER == direction_char:
        return [1, 0, 0, 0, 0]
    elif Constants.DIRECTIONS.NORTH == direction_char:
        return [0, 1, 0, 0, 0]
    elif Constants.DIRECTIONS.WEST == direction_char:
        return [0, 0, 1, 0, 0]
    elif Constants.DIRECTIONS.SOUTH == direction_char:
        return [0, 0, 0, 1, 0]
    elif Constants.DIRECTIONS.EAST == direction_char:
        return [0, 0, 0, 0, 1]
    else:
        raise ValueError("discretize_direction")
    

def get_closest_city_position_from_unit(city : City, unit : Unit):
    """gets the position of the closest city cell of a city from a unit

    Parameters
    ----------
    city : City
        The city object
    unit : Unit
        the unit object

    Returns
    -------
    tuple
        a tuple containing the city, the closest city cell, and the distance to the closest city cell
    """
    distances = [ (ctc, unit.pos.distance_to(ctc.pos)) for ctc in city.city_cells ]
    if len(distances) == 0:
        return None
    else:
        closest = min(distances, key = lambda t: t[1])
        return (city, closest[0], closest[1])
        

def my_get_unit(game : Game, unit_id : str) -> Unit:
    """gets a unit given this unit id

    Parameters
    ----------
    game : Game
        The game object
    unit_id : str
        the id of a unit, such as 'u_1'

    Returns
    -------
    Unit
        a unit object
    """
    if unit_id in game.state['teamStates'][0]['units'].keys():
        return game.state['teamStates'][0]['units'][unit_id]
    elif unit_id in game.state['teamStates'][1]['units'].keys():
        return game.state['teamStates'][1]['units'][unit_id]
    else:
        return None



# LuxAI Environment

In [None]:
class SimpleWorkerDictEnv(gym.Env):
    
    def __init__(self, configs : dict):
        """
        Parameters
        ----------
        configs : dict
            The game configurations
        """
        self._configs = configs
        self._game = None
        game_info_state = {
            'game_percent' : gym.spaces.Box(low=0.0, high=1.0, shape=(1, ), dtype=np.float32),
            'daynight' : gym.spaces.Box(low=0.0, high=1.0, shape=(2, ), dtype=np.float32),
            'coal_researched_percent' : gym.spaces.Box(low=0.0, high=1.0, shape=(1, ), dtype=np.float32),
            'uranium_researched_percent' : gym.spaces.Box(low=0.0, high=1.0, shape=(1, ), dtype=np.float32),
            'coal_researched' : gym.spaces.Box(low=0.0, high=1.0, shape=(1, ), dtype=np.float32),
            'uranium_researched' : gym.spaces.Box(low=0.0, high=1.0, shape=(1, ), dtype=np.float32),
            'can_act' : gym.spaces.Box(low=0.0, high=1.0, shape=(1, ), dtype=np.float32),
            'can_build' : gym.spaces.Box(low=0.0, high=1.0, shape=(1, ), dtype=np.float32)
            }
        observation_space = {
            'game_info_state' : gym.spaces.Dict(game_info_state),
            'closest_wood1' : gym.spaces.Box(low=-1.0, high=1.0, shape=(7, ), dtype=np.float32),
            'closest_citytile1' : gym.spaces.Box(low=-1.0, high=1.0, shape=(9, ), dtype=np.float32),
            'closest_free_cell1' : gym.spaces.Box(low=-1.0, high=1.0, shape=(6, ), dtype=np.float32)
                }
        self.observation_space = gym.spaces.Dict(observation_space)
        self.action_space = spaces.Discrete(6)
        self._episode_ended = True
        self._worker_being_trained = None
        
        
    def scalar_to_action_worker(self, worker : Unit, action : int ) -> Action :
        """Converts an action represented to an action represented as an Action object

        Parameters
        ----------
        worker : unit
            the id of the worker
        action : int32
            the action
            
        Returns
        -------
        Action
            an Action object

        Raises
        ------
        ValueError
            If the int action is not supported
        """
        # add type checking
        #unit_id = self._worker_for_training.id
        unit_id = worker.id
        team = worker.team
        unit = my_get_unit(self._game, worker.id)
        if unit is None:
            return None
        # dont move
        if action == 0:
            #ma = MoveAction(team, unit_id, Constants.DIRECTIONS.CENTER)
            #return ma
            return None
        # move north
        elif action == 1:
            ma = MoveAction(team, unit_id, Constants.DIRECTIONS.NORTH)
            return ma
        # move east
        elif action == 2:
            ma = MoveAction(team, unit_id, Constants.DIRECTIONS.EAST)
            return ma
        # move south
        elif action == 3:
            ma = MoveAction(team, unit_id, Constants.DIRECTIONS.SOUTH)
            return ma
        # move west
        elif action == 4:
            ma = MoveAction(team, unit_id, Constants.DIRECTIONS.WEST)
            return ma
        # build city
        elif action == 5:
            sca = SpawnCityAction(team, unit_id)
            return sca
        else:
            raise ValueError(f"invalid action {action}")
         
    def reset(self):
        """Resets the environment. called when either the game ended, or the worker dies.
            
        Raises
        ------
        ValueError
            If no worker is available for training
        """
        if self._game is None or self._game.match_over() or self._episode_ended:
            self._game = Game(self._configs)
        self._episode_ended = False
        self._team = random.sample([0, 1], 1)[0]
        self._other = 0 if self._team == 1 else 1
        
        all_unit_ids_team = list(self._game.state["teamStates"][self._team]["units"].keys())
        
        all_workers_team = []
        
        
        for unit_id_team in all_unit_ids_team:
            unit = my_get_unit(self._game, unit_id_team)
            if unit.is_worker():
                all_workers_team.append(unit)
            elif unit.is_cart():
                all_carts_team.append(unit)
            else:
                raise ValueError("_reset all_unit_ids_team")
                
        if len(all_workers_team) > 0:            
            self._worker_being_trained = np.random.choice(all_workers_team)
            wob = build_worker_observation(self._game, self._configs, self._worker_being_trained)
            if not self.observation_space.contains(wob):
                raise ValueError(f"SimpleWorkerDictEnv.build_worker_observation {wob}")
            return wob
        else:
            raise ValueError("reset no worker available")
        
        
    def step(self, action : int) -> Tuple[dict, np.float32, bool, dict]:
        """performs one step in the environment, meaning performing the action and returning an observation of the resulting state

        Parameters
        ----------
        action : int32
            the action
            
        Returns
        -------
        Tuple
            a Tuple object containing a new observation, the reward, a boolean "is the game done", and a dict containing debug info

        Raises
        ------
        ValueError
            If the int action is not supported
        """
        if not self.action_space.contains(action):
            raise ValueError(f"SimpleWorkerDictEnv._step {action}")
        
        action = self.scalar_to_action_worker(self._worker_being_trained, 
                                              action)                 
        actions = [ action ]
        self._episode_ended = self._game.run_turn_with_actions(actions)
        observation = build_worker_observation(self._game, self._configs, self._worker_being_trained)
        if not self.observation_space.contains(observation):
            raise ValueError(f"SimpleWorkerDictEnv.build_worker_observation {observation}")
        self._last_reward = build_worker_reward(self._game, self._worker_being_trained.id, self._worker_being_trained.team)
        test = my_get_unit(self._game, self._worker_being_trained.id)
        if test is None:
            self._episode_ended = True
        return (observation, self._last_reward, self._episode_ended, {})


# Build Observation

In [None]:
def build_worker_observation(game : Game, configs : dict, worker : Unit):
    """builds an observation matching the observation space for a given worker.

    Parameters
    ----------
    game : Game
        The game object
    configs : dict
        the configs of the game
    worker : Unit
        the unit object

    Returns
    -------
    dict
        a dictionary with a np.array for the closest wood cell and the closest city cell
    """
    game_info_state = build_worker_observation_game_info_state(game, configs, worker)
    
    closest_woods = build_worker_observation_closest_resources(game, configs, worker, Constants.RESOURCE_TYPES.WOOD, 5)
    closest_wood1 = closest_woods[0]
    
    closest_citytiles = build_worker_observation_citytiles(game, worker, worker.team, 3)
    closest_citytile1 = closest_citytiles[0]
    
    closest_free_cells = build_worker_observation_free_cells(game, worker, 3)
    closest_free_cell1 = closest_free_cells[0]
    
    
    wob = {
        'game_info_state' : game_info_state,
        'closest_wood1' : closest_wood1,
        'closest_citytile1' : closest_citytile1,
        'closest_free_cell1' : closest_free_cell1
        }
    return wob            


## Build State observation

In [None]:
def build_worker_observation_game_info_state(game : Game, configs : dict, worker : Unit):
    gif = {
        'game_percent' : [ game.state['turn'] / configs['parameters']['MAX_DAYS'] ],
        'daynight' : [ 0 if game.is_night() else 1, 1 if game.is_night() else 0 ],
        'coal_researched_percent' : [ game.state['teamStates'][worker.team]['researchPoints'] / configs['parameters']['RESEARCH_REQUIREMENTS']['COAL'] ],
        'uranium_researched_percent' : [ game.state['teamStates'][worker.team]['researchPoints'] / configs['parameters']['RESEARCH_REQUIREMENTS']['URANIUM'] ],
        'coal_researched' : [ 1 if game.state['teamStates'][worker.team]['researched']['coal'] else 0 ] ,
        'uranium_researched' : [ 1 if game.state['teamStates'][worker.team]['researched']['uranium'] else 0 ],
        'can_act' : [ 1 if worker.can_act() else 0],
        'can_build' : [ 1 if worker.can_build(game.map) else 0]
        }
    return gif


## Build Free Cells Observations

In [None]:
def build_worker_observation_free_cell(unit, cellinfo, free_cell_distance_mean, free_cell_distance_max):
    if cellinfo is None:
        return np.array([-1] * 6, dtype=np.float32)
    direction = unit.pos.direction_to(cellinfo[0].pos)
    distance = unit.pos.distance_to(cellinfo[0].pos)
    direction_discrete = discretize_direction(direction)
    norm_distance = (distance - free_cell_distance_mean) / free_cell_distance_max
    
    wofc = np.array(direction_discrete + [norm_distance ], dtype=np.float32)
    return wofc


def build_worker_observation_free_cells(game : Game, unit : Unit, nbcells : int):
    free_cell_distance_max = np.sqrt(game.map.width**2 + game.map.height**2)
    free_cell_distance_mean = free_cell_distance_max / 2
    list_cells = []
    for x in range(game.map.width):
        for y in range(game.map.height):
            cell = game.map.get_cell(x, y)
            if my_is_cell_free(cell):
                distance = cell.pos.distance_to(unit.pos)
                list_cells.append( (cell, distance))
    list_cells = np.array(list_cells)
    sort_indices = np.argsort(list_cells[:,1])
    sorted_cells = list_cells[sort_indices]
    obslist = []
    for i in range(nbcells):
        cellinfo = sorted_cells[i] if i < len(sorted_cells) else None
        obs = build_worker_observation_free_cell(unit, cellinfo,
                                                free_cell_distance_mean, free_cell_distance_max)
        obslist.append(obs)
    return obslist
    

## Build Resources Observations

In [None]:
def build_worker_observation_closest_resource_cell(unit, resource_cell, 
                                                   distance_mean, distance_max,
                                                   res_amount_mean, res_amount_max):
    if resource_cell is None:
        return np.array([-1] * 7, dtype=np.float32)
    direction = unit.pos.direction_to(resource_cell.pos)
    distance = unit.pos.distance_to(resource_cell.pos)
    res_amount = resource_cell.resource.amount
    
    direction_discrete = discretize_direction(direction)
    norm_distance = (distance - distance_mean) / distance_max
    norm_amount = (res_amount - res_amount_mean) / res_amount_max
    
    wocrc = np.array(direction_discrete + [norm_distance, norm_amount ], dtype=np.float32)
    return wocrc
  

    
def build_worker_observation_closest_resources(game, configs, unit, resource_type, nbresources):
    if resource_type == Constants.RESOURCE_TYPES.WOOD:
        amount_mean = configs["parameters"]["MAX_WOOD_AMOUNT"] / 2 * 1.1
        amount_max = configs["parameters"]["MAX_WOOD_AMOUNT"] * 1.1
    elif resource_type == Constants.RESOURCE_TYPES.COAL:
        amount_mean = configs["parameters"]["MAX_COAL_AMOUNT"] / 2 * 1.1
        amount_max = configs["parameters"]["MAX_COAL_AMOUNT"] * 1.1
    elif resource_type == Constants.RESOURCE_TYPES.URANIUM:
        amount_mean = configs["parameters"]["MAX_URANIUM_AMOUNT"] / 2 * 1.1
        amount_max = configs["parameters"]["MAX_URANIUM_AMOUNT"] * 1.1
    else:
        raise ValueError("build_worker_observation_resource")
    
    distance_max = np.sqrt(game.map.width**2 + game.map.height**2)
    distance_mean = distance_max / 2
        
    resource_cells = game.map.resources_by_type[resource_type]
    resource_distances = np.array([ (cell, unit.pos.distance_to(cell.pos) ) for cell in resource_cells ])
    sort_indices = np.argsort(resource_distances[:,1])
    sorted_resources = resource_distances[sort_indices]
    obslist = []
    for i in range(nbresources):
        resource_cell = sorted_resources[i][0] if i < len(sorted_resources) else None
        obs = build_worker_observation_closest_resource_cell(unit, resource_cell,
                                                         distance_mean, distance_max, 
                                                         amount_mean, amount_max)
        obslist.append(obs)
    return obslist
    
   
    


## Build CityTile Observations

In [None]:
def build_worker_observation_citytile(unit, cityinfo, 
                                        city_fuel_mean, city_fuel_max, 
                                        city_upkeep_mean, city_upkeep_max,
                                        city_size_mean, city_size_max,
                                        city_distance_mean, city_distance_max):
    if cityinfo is None:
        return np.array([-1] * 9, dtype=np.float32)
    direction = unit.pos.direction_to(cityinfo[1].pos)
    distance = unit.pos.distance_to(cityinfo[1].pos)
    fuel = cityinfo[0].fuel
    upkeep = cityinfo[0].get_light_upkeep()
    city_size = len(cityinfo[0].city_cells)
    
    direction_discrete = discretize_direction(direction)
    norm_distance = (distance - city_distance_mean) / city_distance_max
    norm_upkeep = (upkeep - city_upkeep_mean) / city_upkeep_max
    norm_fuel = (fuel - city_size_mean) / city_fuel_max
    norm_city_size = (city_size - city_size_mean) / city_size_max
    
    woc = np.array(direction_discrete + [norm_distance, norm_fuel, norm_upkeep, norm_city_size ], dtype=np.float32)
    return woc


def build_worker_observation_citytiles(game : Game, unit : Unit, team_id : int, nbcities : int):
    city_fuel_mean = 0
    city_fuel_max = 10000
    city_upkeep_mean = 0
    city_upkeep_max = 200
    city_size_mean = 1
    city_size_max = 10
    #city_distance_mean = 0
    #city_distance_max = 10
    city_distance_max = np.sqrt(game.map.width**2 + game.map.height**2)
    city_distance_mean = city_distance_max / 2
    
    cities = np.array([  get_closest_city_position_from_unit(city, unit) for city in game.cities.values() if city.team == team_id ])
    if len(cities) == 0:
        return  [ np.array([-1.0] * 9, np.float32) for i in range(nbcities) ]
    sort_indices = np.argsort(cities[:,2])
    sorted_cities = cities[sort_indices]
    obslist = []
    for i in range(nbcities):
        cityinfo = sorted_cities[i] if i < len(sorted_cities) else None
        obs = build_worker_observation_citytile(unit, cityinfo,
                                                city_fuel_mean, city_fuel_max,
                                                city_upkeep_mean, city_upkeep_max,
                                                city_size_mean, city_size_max,
                                                city_distance_mean, city_distance_max)
        obslist.append(obs)
    return obslist


# Train the model

In [None]:
swde = SimpleWorkerDictEnv(LuxMatchConfigs_Default)
mswde = Monitor(swde, './logdir')
env = make_vec_env(FlattenObservation, 
                       n_envs=4, 
                       env_kwargs={'env' : Monitor(SimpleWorkerDictEnv(LuxMatchConfigs_Default), './logdir')})
model = PPO("MlpPolicy", env, verbose=1, device="cpu", tensorboard_log="./tensorboard/")
model.learn(total_timesteps=int(1e6), n_eval_episodes = 360)
model.save("best_ppo_worker_py37_20211103_001")
print("done.")

# Plot the training steps

In [None]:
def moving_average(values, window):
    """
    Smooth values by doing a moving average
    :param values: (numpy array)
    :param window: (int)
    :return: (numpy array)
    """
    weights = np.repeat(1.0, window) / window
    return np.convolve(values, weights, 'valid')


def plot_results(log_folder, title='Learning Curve'):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """
    x, y = ts2xy(load_results(log_folder), 'timesteps')
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]

    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")
    plt.show()

In [None]:
plot_results(".")

# Evaluate the model

In [None]:
eval_swde = SimpleWorkerDictEnv(LuxMatchConfigs_Default)
eval_swfe = FlattenObservation(eval_swde)
eval_menv = Monitor(eval_swfe, './logdir')

model2 = PPO.load("best_ppo_worker_py37_20211103_001")
mean_reward, std_reward = evaluate_policy(model2, eval_menv, n_eval_episodes=20)
print(f'Mean reward: {mean_reward:,.0f} +/- {std_reward:,.0f}')