In [1]:
import re
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import GRPOConfig, GRPOTrainer
import luxai_s3
from luxai_s3.wrappers import LuxAIS3GymEnv, RecordEpisode
from luxai_s3.params import EnvParams
import numpy as np
#from stable_baselines3 import PPO
#import gymnasium as gym
#import gym

INFO 02-10 22:51:49 __init__.py:190] Automatically detected platform cuda.


In [2]:
from lux.utils import direction_to
import sys
import numpy as np

# Helper functions
def manhattan_distance(pos1, pos2):
    return abs(pos1[0] - pos2[0]) + abs(pos1[1] - pos2[1])

def absolute_distance(pos1, pos2):
    return max(abs(pos1[0] - pos2[0]), abs(pos1[1] - pos2[1]))

def find_opposite_corner_coords(array, row, col):
    """
    Given a 2D array and a coordinate (row, col), this function returns the opposite corner coordinates.

    :param array: 2D list or NumPy array
    :param row: Row index of the given point
    :param col: Column index of the given point
    :return: (row', col') - Opposite corner coordinates
    """
    num_rows = len(array)
    num_cols = len(array[0]) if num_rows > 0 else 0

    # Opposite coordinates
    opp_row = num_rows - 1 - row
    opp_col = num_cols - 1 - col

    return (opp_row, opp_col)


# Classes

class TileType:
    unknown = -1
    empty = 0
    asteroid = 1
    nebula = 2

class ActionType:
    center = 0
    up = 1
    right = 2
    down = 3
    left = 4
    sap = 5

class Tile:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        self.type = TileType.unknown
        self.energy = None
        self.is_visible = False
        self.is_relic = False
        self.is_reward = False
        self.is_explored = False
        self.is_explored_for_relic = False
        self.is_explored_for_reward = False
        
    @property
    def coordinates(self):
        return (self.x, self.y)
        
    @property
    def is_walkable(self):
        return self.type != NodeType.asteroid


#class Fleet:
#    def __init__(self, 






class Agent():
    def __init__(self, player: str, env_cfg) -> None:
        self.player = player
        self.enemy_player = "player_1" if self.player == "player_0" else "player_0"
        self.team_id = 0 if self.player == "player_0" else 1
        self.enemy_team_id = 1 if self.team_id == 0 else 0
        #np.random.seed(0)
        self.env_cfg = env_cfg
        #self.min_unit_sap_dropoff_factor = 1
        #self.min_sap_power = self.unit_sap_cost * self.min_unit_sap_dropoff_factor
        self.map_height = env_cfg["map_height"]
        self.map_width = env_cfg["map_width"]
        self.my_spawn_location = None
        self.enemy_spawn_location = None
        self.first_spawn = False
        self.llm_input = None

        self.map_explored_status = np.zeros((self.map_height, self.map_width), dtype=int)
        
        self.relic_node_positions = []
        self.discovered_relic_nodes_ids = set()
        self.unit_explore_locations = dict()

    def prep_llm_input(self, env_cfg, obs):

        ### env_cfg information
        max_units = f"Maximum possible number of units: {env_cfg['max_units']}."
        match_count_per_episode = f"Number of matches per game: {env_cfg['match_count_per_episode']}."
        max_steps_in_match = f"Number of steps per match: {env_cfg['max_steps_in_match']}."
        map_height = f"Map height: {env_cfg['map_height']}."
        map_width = f"Map width: {env_cfg['map_width']}."
        num_teams = f"Number of teams: {env_cfg['num_teams']}."
        unit_move_cost = f"Unit move energy cost: {env_cfg['unit_move_cost']}."
        unit_sap_cost = f"Unit sap energy cost: {env_cfg['unit_sap_cost']}."
        unit_sap_range = f"Unit sap range: {env_cfg['unit_sap_range']}."
        unit_sensor_range = f"Unit sensor range: {env_cfg['unit_sensor_range']}."

        ### obs information
        unit_position_warning = "Unit position: -1, -1 means the unit is not spawned yet or not visible."

        # unit positions
        obs_my_unit_positions = obs['units']['position'][self.team_id]
        my_unit_positions_list = []
        for i in range(obs_my_unit_positions.shape[0]):
            pos = obs_my_unit_positions[i]
            my_unit_positions_list.append(f"My unit {i} position: {pos[0]}, {pos[1]}.")
        my_unit_positions = " ".join(my_unit_positions_list)

        obs_enemy_unit_positions = obs['units']['position'][self.enemy_team_id]
        enemy_unit_positions_list = []
        for i in range(obs_enemy_unit_positions.shape[0]):
            pos = obs_enemy_unit_positions[i]
            enemy_unit_positions_list.append(f"Enemy unit {i} position: {pos[0]}, {pos[1]}.")
        enemy_unit_positions = " ".join(enemy_unit_positions_list)

        # unit energys
        obs_my_unit_energys = obs['units']['energy'][self.team_id]
        my_unit_energys_list = []
        for i in range(obs_my_unit_energys.shape[0]):
            energy = obs_my_unit_energys[i]
            my_unit_energys_list.append(f"My unit {i} energy: {energy}.")
        my_unit_energys = " ".join(my_unit_energys_list)

        obs_enemy_unit_energys = obs['units']['energy'][self.enemy_team_id]
        enemy_unit_energys_list = []
        for i in range(obs_enemy_unit_energys.shape[0]):
            energy = obs_enemy_unit_energys[i]
            enemy_unit_energys_list.append(f"Enemy unit {i} energy: {energy}.")
        enemy_unit_energys = " ".join(enemy_unit_energys_list)

        # unit masks
        obs_my_units_mask = obs['units_mask'][self.team_id]
        my_units_mask_list = []
        for i in range(obs_my_units_mask.shape[0]):
            mask = obs_my_units_mask[i]
            my_units_mask_list.append(f"My unit {i} visibility: {mask}.")
        my_units_mask = " ".join(my_units_mask_list)

        obs_enemy_units_mask = obs['units_mask'][self.enemy_team_id]
        enemy_units_mask_list = []
        for i in range(obs_enemy_units_mask.shape[0]):
            mask = obs_enemy_units_mask[i]
            enemy_units_mask_list.append(f"Enemy unit {i} visibility: {mask}.")
        enemy_units_mask = " ".join(enemy_units_mask_list)

        # sensor mask
        obs_sensor_mask = obs['sensor_mask']
        sensor_mask_list = []
        for i in range(obs_sensor_mask.shape[0]):
            sensor_mask_list.append(f"Sensor mask row {i}: {str(obs_sensor_mask[i]).replace("[", "").replace("]", "")}.")
        sensor_mask = " ".join(sensor_mask_list)

        # map features - energy
        obs_map_features_energy = obs['map_features']['energy']
        map_features_energy_list = []
        for i in range(obs_map_features_energy.shape[0]):
            map_features_energy_list.append(f"Map energy row {i}: {str(obs_map_features_energy[i]).replace("[", "").replace("]", "")}.")
        map_features_energy = " ".join(map_features_energy_list)

        # map features - tile_type
        obs_map_features_tile_type = obs['map_features']['tile_type']
        map_features_tile_type_list = []
        for i in range(obs_map_features_tile_type.shape[0]):
            map_features_tile_type_list.append(f"Map node type row {i}: {str(obs_map_features_tile_type[i]).replace("[", "").replace("]", "")}.")
        map_features_tile_type = " ".join(map_features_tile_type_list)

        # relic nodes
        relic_node_warning = "Relic node position: -1, -1 means the relic node is not yet discoverd."
        obs_relic_nodes = obs['relic_nodes']
        relic_nodes_list = []
        for i in range(obs_relic_nodes.shape[0]):
            relic_nodes_list.append(f"Relic node {i} position: {obs_relic_nodes[i][0]}, {obs_relic_nodes[i][1]}.")
        relic_nodes = " ".join(relic_nodes_list)

        # relic nodes mask
        obs_relic_nodes_mask = obs['relic_nodes_mask']
        relic_nodes_mask_list = []
        for i in range(obs_relic_nodes_mask.shape[0]):
            relic_nodes_mask_list.append(f"Relic node {i} visibility: {obs_relic_nodes_mask[i]}.")
        relic_nodes_mask = " ".join(relic_nodes_mask_list)

        # team points
        my_team_points = f"My current point for this match is: {obs['team_points'][self.team_id]}."
        enemy_team_points = f"Enemy current point for this match is: {obs['team_points'][self.enemy_team_id]}."

        # team wins
        my_team_wins = f"I have won {obs['team_wins'][self.team_id]} matches."
        enemy_team_wins = f"Enemy has won {obs['team_wins'][self.enemy_team_id]} matches."

        # steps
        steps = f"This is step {obs['steps']} of the game."

        # match_steps
        match_steps = f"This is step {obs['match_steps']} of the match."

        if self.enemy_spawn_location is None:
            enemy_spawn_location_warning = "Enemy spawn location: not yet discovered."
        else:
            enemy_spawn_location_warning = f"Enemy spawn location: {self.enemy_spawn_location[0]}, {self.enemy_spawn_location[1]}."
        
        all_variables = " ".join([
            max_units, match_count_per_episode, max_steps_in_match, map_height, map_width, num_teams, unit_move_cost, unit_sap_cost, unit_sap_range, unit_sensor_range, unit_position_warning,
            my_unit_positions, enemy_unit_positions, my_unit_energys, enemy_unit_energys, my_units_mask, enemy_units_mask, sensor_mask, map_features_energy, map_features_tile_type,
            relic_node_warning, relic_nodes, relic_nodes_mask, my_team_points, enemy_team_points, my_team_wins, enemy_team_wins, steps, match_steps, enemy_spawn_location_warning
        ])

        return all_variables

    def act(self, step: int, obs, remainingOverageTime: int = 60):
        """implement this function to decide what actions to send to each available unit. 
        
        step is the current timestep number of the game starting from 0 going up to max_steps_in_match * match_count_per_episode - 1.
        """

        # units
        unit_positions = np.array(obs["units"]["position"][self.team_id]) # shape (max_units, 2)
        enemy_unit_positions = np.array(obs["units"]["position"][self.enemy_team_id]) # shape (max_units, 2)

        unit_energys = np.array(obs["units"]["energy"][self.team_id]) # shape (max_units, 1)
        enemy_unit_energys = np.array(obs["units"]["energy"][self.enemy_team_id]) # shape (max_units, 1)

        # units_mask
        unit_mask = np.array(obs["units_mask"][self.team_id]) # shape (max_units, )
        enemy_unit_mask = np.array(obs["units_mask"][self.enemy_team_id]) # shape (max_units, )

        # sensor_mask
        sensor_mask = obs['sensor_mask']

        # map_features
        map_features = obs['map_features']
        current_map_energy = map_features['energy']
        current_map_tile_type = map_features['tile_type']

        # update map explored status
        self.map_explored_status[current_map_tile_type != -1] = 1
        
        observed_relic_node_positions = np.array(obs["relic_nodes"]) # shape (max_relic_nodes, 2)
        observed_relic_nodes_mask = np.array(obs["relic_nodes_mask"]) # shape (max_relic_nodes, )
        team_points = np.array(obs["team_points"]) # points of each team, team_points[self.team_id] is the points of the your team
        
        # ids of units you can control at this timestep
        available_unit_ids = np.where(unit_mask)[0]
        enemy_available_unit_ids = np.where(enemy_unit_mask)[0]

        if available_unit_ids.shape[0] == 0:
            pass
        else:
            if self.first_spawn == False:
                first_unit_id = available_unit_ids[0]
                first_unit_pos = unit_positions[first_unit_id]
                self.my_spawn_location = (first_unit_pos[0], first_unit_pos[1])
                self.enemy_spawn_location = find_opposite_corner_coords(self.map_explored_status, first_unit_pos[0], first_unit_pos[1])
                self.first_spawn = True
        
        # visible relic nodes
        visible_relic_node_ids = set(np.where(observed_relic_nodes_mask)[0])
        
        actions = np.zeros((self.env_cfg["max_units"], 3), dtype=int)


        # basic strategy here is simply to have some units randomly explore and some units collecting as much energy as possible
        # and once a relic node is found, we send all units to move randomly around the first relic node to gain points
        # and information about where relic nodes are found are saved for the next match
        
        # save any new relic nodes that we discover for the rest of the game.
        for id in visible_relic_node_ids:
            if id not in self.discovered_relic_nodes_ids:
                self.discovered_relic_nodes_ids.add(id)
                self.relic_node_positions.append(observed_relic_node_positions[id])


        # combat algorithms
        all_attack_units = []
        firing_solutions = {}
        if enemy_available_unit_ids.shape[0] != 0:
            for enemy_unit_id in enemy_available_unit_ids:
                enemy_unit_pos = enemy_unit_positions[enemy_unit_id]
                enemy_unit_energy = enemy_unit_energys[enemy_unit_id]

                if enemy_unit_energy >= 0:
                
                    group = []
                    group_damage = 0
                    for unit_id in available_unit_ids:
                        unit_pos = unit_positions[unit_id]
                        unit_energy = unit_energys[unit_id]
                        enemy_distance = absolute_distance(unit_pos, enemy_unit_pos)
                        if group_damage < enemy_unit_energy:
                            if enemy_distance <= self.unit_sap_range:
                                if unit_energy >= self.unit_sap_cost:
                                    if unit_id not in all_attack_units:
                                        group_damage += self.unit_sap_cost
                                        group.append(unit_id)
                                        
                        else:
                            continue
                    attack_group = []
                    if group_damage >= enemy_unit_energy:
                        for unit_id in group:
                            attack_group.append(unit_id)
                            all_attack_units.append(unit_id)

                    firing_solutions[f"{enemy_unit_id}"] = attack_group


        
        # unit ids range from 0 to max_units - 1
        for unit_id in available_unit_ids:
            unit_pos = unit_positions[unit_id]
            unit_energy = unit_energys[unit_id]

            # randomly explore by picking a random location on the map and moving there for about 20 steps
            if step % 20 == 0 or unit_id not in self.unit_explore_locations:
                rand_loc = (np.random.randint(0, self.env_cfg["map_width"]), np.random.randint(0, self.env_cfg["map_height"]))
                self.unit_explore_locations[unit_id] = rand_loc
            actions[unit_id] = [direction_to(unit_pos, self.unit_explore_locations[unit_id]), 0, 0]

        if len(firing_solutions) > 0:
            for enemy_unit_id in firing_solutions.keys():
                attack_group = firing_solutions[enemy_unit_id]
                enemy_unit_id = int(enemy_unit_id)
                enemy_unit_pos = enemy_unit_positions[enemy_unit_id]
    
                for attack_unit_id in attack_group:
                    attack_unit_pos = unit_positions[attack_unit_id]
                    dx = enemy_unit_pos[0] - attack_unit_pos[0]
                    dy = enemy_unit_pos[1] - attack_unit_pos[1]
                    actions[attack_unit_id] = [5, dx, dy]          

        self.llm_input = self.prep_llm_input(self.env_cfg, obs)  
                
        return actions

In [3]:
env = RecordEpisode(
    LuxAIS3GymEnv(numpy_output=True)
)



In [4]:
obs_all, info = env.reset()

In [5]:
agent1 = Agent("player_0", info['params'])
agent2 = Agent("player_1", info['params'])

In [6]:
obs_all['player_0']['steps']

array(0, dtype=int32)

In [7]:
obs_all['player_0']['match_steps']

array(0, dtype=int32)

In [8]:
actions1 = agent1.act(obs_all['player_0']['steps'], obs_all['player_0'])

In [9]:
agent1.llm_input

'Maximum possible number of units: 16. Number of matches per game: 5. Number of steps per match: 100. Map height: 24. Map width: 24. Number of teams: 2. Unit move energy cost: 4. Unit sap energy cost: 45. Unit sap range: 3. Unit sensor range: 2. Unit position: -1, -1 means the unit is not spawned yet or not visible. My unit 0 position: -1, -1. My unit 1 position: -1, -1. My unit 2 position: -1, -1. My unit 3 position: -1, -1. My unit 4 position: -1, -1. My unit 5 position: -1, -1. My unit 6 position: -1, -1. My unit 7 position: -1, -1. My unit 8 position: -1, -1. My unit 9 position: -1, -1. My unit 10 position: -1, -1. My unit 11 position: -1, -1. My unit 12 position: -1, -1. My unit 13 position: -1, -1. My unit 14 position: -1, -1. My unit 15 position: -1, -1. Enemy unit 0 position: -1, -1. Enemy unit 1 position: -1, -1. Enemy unit 2 position: -1, -1. Enemy unit 3 position: -1, -1. Enemy unit 4 position: -1, -1. Enemy unit 5 position: -1, -1. Enemy unit 6 position: -1, -1. Enemy unit 

In [10]:
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# ✅ Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# ✅ Ensure pad token is set correctly
tokenizer.pad_token = tokenizer.eos_token

# ✅ Optimized quantization configuration
bnb_config = BitsAndBytesConfig(
    #load_in_4bit=True,
    #bnb_8bit_compute_dtype=torch.bfloat16,
    #bnb_4bit_quant_type="nf4",
    #bnb_4bit_use_double_quant=True  # ✅ Add nested quantization for better memory usage
)

# ✅ Load model with proper device placement
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    device_map="auto",  # Let Accelerate handle device placement
    #quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
)

In [11]:
#max_new_tokens = 1024

# ✅ Optimized input preparation
prompt = agent1.llm_input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

In [14]:
# ✅ Optimized generation parameters
generated_output = model.generate(
    **inputs,
    max_new_tokens=inputs['input_ids'].shape[1],
    pad_token_id=tokenizer.pad_token_id,
    #do_sample=True,
    temperature=0.6,
    #top_k=50,
    #top_p=0.9,
    #use_cache=True  # ✅ Enable KV caching for faster generation
)

# ✅ Efficient decoding
response = tokenizer.decode(generated_output[0], skip_special_tokens=True)
print(response)

Maximum possible number of units: 16. Number of matches per game: 5. Number of steps per match: 100. Map height: 24. Map width: 24. Number of teams: 2. Unit move energy cost: 4. Unit sap energy cost: 45. Unit sap range: 3. Unit sensor range: 2. Unit position: -1, -1 means the unit is not spawned yet or not visible. My unit 0 position: -1, -1. My unit 1 position: -1, -1. My unit 2 position: -1, -1. My unit 3 position: -1, -1. My unit 4 position: -1, -1. My unit 5 position: -1, -1. My unit 6 position: -1, -1. My unit 7 position: -1, -1. My unit 8 position: -1, -1. My unit 9 position: -1, -1. My unit 10 position: -1, -1. My unit 11 position: -1, -1. My unit 12 position: -1, -1. My unit 13 position: -1, -1. My unit 14 position: -1, -1. My unit 15 position: -1, -1. Enemy unit 0 position: -1, -1. Enemy unit 1 position: -1, -1. Enemy unit 2 position: -1, -1. Enemy unit 3 position: -1, -1. Enemy unit 4 position: -1, -1. Enemy unit 5 position: -1, -1. Enemy unit 6 position: -1, -1. Enemy unit 7

In [15]:
from datasets import load_dataset, Dataset

In [16]:
data = load_dataset('openai/gsm8k', 'main')

README.md:   0%|          | 0.00/7.94k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

In [18]:
train_data = data['train']
train_data

Dataset({
    features: ['question', 'answer'],
    num_rows: 7473
})

In [27]:
train_data.num_rows

7473

In [None]:
game_rules = """Environment
In the Lux AI Challenge Season 3, two teams compete against each other on a 2D map in a best of 5 match sequence (called a game) with each match lasting 100 time steps. Both teams have a pool of units they can control to gain points around the map while also trying to prevent the other team from doing the same.

Unique to Season 3 is how various game mechanics and parameters are randomized at the start of each game and remain the same between matches in one game. Some mechanics/paramters include the map terrain/generation, how much units can see on the map, how might they be blocked by map features, etc. Each match is played with fog of war, where each team can only see what their own units can see, with everything else being hidden. Given that some mechanics are randomized between games, the specs will clearly document how they are randomized and what the possible values are. There is also a summary table of every game parameter that is randomized between games in the Game Parameters section below.

A core objective of this game is a balanced strategy of exploration and exploitation. It is recommended to explore more in the first match or two before leveraging gained knowledge about the map and opponent behavior to win the latter matches.

Map
The map is a randomly generated 2D grid of size 24x24. There are several core features that make up the map: Empty Tiles, Asteroid Tiles, Nebula Tiles, Energy Nodes, and Relic Nodes. Notably, in a game, the map is never regenerated completely between matches. Whatever is the state of the map at the end of one match is what is used for the next match.

Empty Tiles
These are empty tiles in space without anything special about them. Units and nodes can be placed/move onto these tiles.

Asteroid Tiles
Asteroid tiles are impassable tiles that block anything from moving/spawning onto them. These tiles might move around over time during the map in a symmetric fashion. Sometimes asteroid tiles might move on top of existing units. In the game the unit is not removed as a result of this and can still take actions and move around provided there is an non asteroid tile adjacent to it.

Nebula Tiles
Nebula tiles are passable tiles with a number of features. These tiles might move around over time during the map in a symmetric fashion.

Vision Reduction: Nebula tiles can reduce/block vision of units. Because of vision reduction it is even possible for a unit to be unable to see itself while still being able to move! See Vision section below for more details on how team vision is determined. All nebula tiles have the same vision reduction value called params.nebula_tile_vision_reduction which is randomized from 0 to 3.

Energy Reduction: Nebula tiles can reduce the energy of units that end their turn on them. All nebula tiles have the same energy reduction value called params.nebula_tile_energy_reduction.

Energy Nodes
Energy nodes are mysterious objects that emit energy fields which can be harvested by units. These nodes might move around over time during the map in a symmetric fashion. In code, what actually occurs in each game is energy nodes are randomly generated on the map symmetrically and a random function is generated for each node. Each energy node's function is a function of distance. The energy value of a tile on a map is determined to be the sum of the energy node functions applied to the distance between tile and each node.

Relic Nodes
Relic nodes are objects in space that enable ships to go near it to gain team points. These relic nodes however are ancient and thus fragmented. As a result, only certain tiles near the relic nodes when a friendly ship is on it will gain points. The tiles that yield points are always hidden and can only be discovered by trial and error by moving around the relic nodes. Relic node positions themselves can be observed if withins sensor range. The tiles around relic nodes can overlap with tiles of other relic nodes but will not yield extra points if that occurs and is treated as one tile.

In code, a random 5x5 configuration / mask centered on the relic node is generated indicating which tiles yield points and which don't. Multiple ships can stack on one tile but will only yield at most one point per tile. Note that ship stacking can be risky due to the sapping action (See Sap Actions section below).

Units
Units in the game are ships that can move one tile in 5 directions (center, up, right, down, left) and perform a ranged energy sapping action. Units can overlap with other friendly units if they move onto the same tile. Units have a energy property which determines whether they can perform actions and start with 100 energy and can have a max of 400 energy. Energy is recharged via the energy field of the map. They always spawn on one of the two corners of the map depending on which team they are on.

Note that nebula tiles and energy fields can modify the energy of a unit when it is on that tile. However they can never reduce the energy of a unit below 0, only opposing units can do that which will then remove the unit from the game to be respawned at a later timestep. Unit IDs range from 0 to params.max_units - 1 for each team, and are recycled when units are spawned in if a previous one was removed.

Move Actions
All move actions except moving center cost params.unit_move_cost energy to perform. Moving center is always free (a zero action). Attempting to move off the edge of the map results in no movement occuring but energy is still consumed. Units cannot move onto tiles with an impassible feature like an asteroid tile.

Sap Actions
The sap action lets a unit target a specific tile on the map within a range called params.unit_sap_range and reduces the energy of each opposition unit on the target tile by params.unit_sap_cost while also costing unit_sap_cost energy to use. Moreover, any opposition units on the 8 adjacent tiles to the target tile are also sapped and their energy is reduced by params.unit_sap_cost * params.unit_sap_dropoff_factor.

Sap actions are submitted to the game engine / environment as a delta x and delta y value relative to the unit's current position. The delta x and delta y value magnitudes must both be <= params.unit_sap_range, so the sap range is a square around the unit.

Generally sap actions are risky since a single miss means your ships lose energy while the opponent does not. The area of effect can mitigate this risk somewhat depending on game parameters. Sap actions can however prove very valuable when opposition ships are heavily stacked and get hit as sapping the stacked tile hits every ship on the tile.

Vision
A team's vision is the combined vision of all units on that team. Team vision is essentially a boolean mask / matrix over the 2D map indicating whether that tile's information is visible to the team. In this game, you can think of each unit having an "eye in the sky" sattelite that is capturing information about the units surroundings, but this sattelite has reduced accuracy the farther away the tile is from the unit.

To determine which map tiles are visible to a team, we compute a vision power value for each tile on the map. For each unit on a team, we check each tile within the unit's sensor range and add 1 + params.unit_sensor_range - min(dx, dy) to the vision power map at tile (x+dx, y+dy) where (x,y) is the unit's position and (dx, dy) is the offset from the unit's position and abs(dx) <= params.unit_sensor_range and abs(dy) <= params.unit_sensor_range.

Nebula tiles have a vision reduction value of params.nebula_tile_vision_reduction. This number is reduced from every tile's vision power if that tile is a nebula tile.

For example, naturally without any nebula tiles the vision power values look like below and create a square of visibility around the unit.



When a unit is near a nebula tile, it can't see details about some nebula tiles, but it can see tiles beyond nebula tiles. Here the unit has a sensor range of 2 and the nebula tile vision reduction value is 2. It can see itself since the vision power centered at the unit is 3, but it can't see other nebula tiles since they are too far or the nebula tile vision reduction reduces the vision power to 0 or less.



When a unit is inside a nebula tile, if the nebula vision reduction is powerful enough (here the nebula vision reduction is 3, unit sensor range is 2), the unit cannot even see itself or any other nebula tiles.



Unit vision can overlap and increase the vision power linearly, which can help handle the situations like above when you cannot see anything. Below the nebula vision reduction is 3 and the unit sensor range is 2, and now some of the nebula tiles are visible thanks to the overlapping vision of two units.



Collisions / Energy Void Fields
In close quarters, units can impact each other in two ways, via direct collisions or by being adjacent to each other and sapping energy via their energy void fields.

In the event of two or more units from opposing teams occupy the same tile at the end of a turn, the team with the highest aggregate energy among its units on that tile survive, while the units of the opposing teams are removed from the game. If it is a tie, all units are removed from the game.

Furthermore, each unit generates an "energy void" field around itself that affects all cardinally (up, right, down left) adjacent opposition units. To determine how exactly each unit is affected by these energy void fields, we compute a 2D map for each team indicating the energy void strength at each tile. A unit contributes to tiles adjacent to itself a energy void strength equal to the total amount of energy the unit has at the start of the turn multiplied by params.unit_energy_void_factor rounded down. After a energy void map is computed for each team, a unit's energy is reduced by the energy void strength of the tile it is on divided by the total number of units on that tile. Note that units removed due to collisions do not contribute to the energy void field.

The energy void fields generally encourage stacking units to better spread out energy sapped by energy void fields of opposition units.

Win Conditions
To win the game, the team must have won the most matches out of the 5 match sequence.

To win a match, the team must have gained more relic points than the other team at the end of the match. If the relic points scores are tied, then the match winner is decided by who has more total unit energy. If that is also tied then the winner is chosen at random.

Match Resolution Order
At each time step of a match, we run the following steps in order:

Move all units that have enough energy to move
Execute the sap actions of all units that have enough energy to do so
Resolve collisions and apply energy void fields
Update the energy of all units based on their position (energy fields and nebula tiles)
Spawn units for all teams. Remove units that have less than 0 energy.
Determine the team vision / sensor masks for all teams and mask out observations accordingly
Environment objects like asteroids/nebula tiles/energy nodes move around in space
Compute new team points
Note that each match runs for params.max_steps_in_match steps and you take that many actions that affect the game. However, you will actually receive params.max_steps_in_match + 1 frames of observations since the very first frame will either be empty or the previous match's final observation (actions on these observations will not do anything).

Game Parameters
The full set of game parameters can be found here in the codebase.

Randomized Game Parameters / Map Generation
There are a number of randomized game paramteres which can modify and even disable/enable certain game mechanics. None of these game parameters are changed between matches in a game. The majority of these parameters are also not given to the teams themselves and must be discovered through exploration.

env_params_ranges = dict(
    map_type=[1],
    unit_move_cost=list(range(1, 6)), # list(range(x, y)) = [x, x+1, x+2, ... , y-1]
    unit_sensor_range=list(range(2, 5)),
    nebula_tile_vision_reduction=list(range(0,4)),
    nebula_tile_energy_reduction=[0, 0, 10, 25],
    unit_sap_cost=list(range(30, 51)),
    unit_sap_range=list(range(3, 8)),
    unit_sap_dropoff_factor=[0.25, 0.5, 1],
    unit_energy_void_factor=[0.0625, 0.125, 0.25, 0.375],
    # map randomizations
    nebula_tile_drift_speed=[-0.05, -0.025, 0.025, 0.05],
    energy_node_drift_speed=[0.01, 0.02, 0.03, 0.04, 0.05],
    energy_node_drift_magnitude=list(range(3, 6))
)
These parameter ranges (and other parameters) are subject to change in the beta phase of this competition as we gather feedback and data."""

In [None]:
game_rules_and_input = game_rules + ' ' + agent1.llm_input

In [None]:
game_rules_and_input

'Environment\nIn the Lux AI Challenge Season 3, two teams compete against each other on a 2D map in a best of 5 match sequence (called a game) with each match lasting 100 time steps. Both teams have a pool of units they can control to gain points around the map while also trying to prevent the other team from doing the same.\n\nUnique to Season 3 is how various game mechanics and parameters are randomized at the start of each game and remain the same between matches in one game. Some mechanics/paramters include the map terrain/generation, how much units can see on the map, how might they be blocked by map features, etc. Each match is played with fog of war, where each team can only see what their own units can see, with everything else being hidden. Given that some mechanics are randomized between games, the specs will clearly document how they are randomized and what the possible values are. There is also a summary table of every game parameter that is randomized between games in the 

In [32]:
inputs = tokenizer(game_rules_and_input, return_tensors="pt").to(model.device)

# ✅ Optimized generation parameters
generated_output = model.generate(
    **inputs,
    max_new_tokens=inputs['input_ids'].shape[1],
    pad_token_id=tokenizer.pad_token_id,
    #do_sample=True,
    temperature=0.6,
    #top_k=50,
    #top_p=0.9,
    #use_cache=True  # ✅ Enable KV caching for faster generation
)

# ✅ Efficient decoding
response = tokenizer.decode(generated_output[0], skip_special_tokens=True)
print(response)

Environment
In the Lux AI Challenge Season 3, two teams compete against each other on a 2D map in a best of 5 match sequence (called a game) with each match lasting 100 time steps. Both teams have a pool of units they can control to gain points around the map while also trying to prevent the other team from doing the same.

Unique to Season 3 is how various game mechanics and parameters are randomized at the start of each game and remain the same between matches in one game. Some mechanics/paramters include the map terrain/generation, how much units can see on the map, how might they be blocked by map features, etc. Each match is played with fog of war, where each team can only see what their own units can see, with everything else being hidden. Given that some mechanics are randomized between games, the specs will clearly document how they are randomized and what the possible values are. There is also a summary table of every game parameter that is randomized between games in the Game

In [None]:
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# ✅ Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# ✅ Ensure pad token is set correctly
tokenizer.pad_token = tokenizer.eos_token

# ✅ Optimized quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True  # ✅ Add nested quantization for better memory usage
)

# ✅ Load model with proper device placement
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    device_map="auto",  # Let Accelerate handle device placement
    #quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
)

In [None]:
for name, param in model.named_parameters():
    print(f"Layer: {name}, Data Type: {param.dtype}")
    break  # Checking one layer is usually sufficient

In [None]:
model.device

In [None]:
# Load and prep dataset

SYSTEM_PROMPT = """
Respond in the following format:
<reasoning>
...
</reasoning>
<answer>
...
</answer>
"""

XML_COT_FORMAT = """\
<reasoning>
{reasoning}
</reasoning>
<answer>
{answer}
</answer>
"""

In [None]:
max_new_tokens = 1024

# ✅ Optimized input preparation
prompt = "hello"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# ✅ Optimized generation parameters
generated_output = model.generate(
    **inputs,
    max_new_tokens=max_new_tokens,
    pad_token_id=tokenizer.pad_token_id,
    do_sample=True,
    temperature=0.7,
    top_k=50,
    top_p=0.9,
    use_cache=True  # ✅ Enable KV caching for faster generation
)

# ✅ Efficient decoding
response = tokenizer.decode(generated_output[0], skip_special_tokens=True)
print(response)

In [None]:
max_new_tokens = 1024

# ✅ Optimized input preparation
prompt = "can you tell what these mean? sensor_mask: '[[False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]\n [False False False False False False False False False False False False False False False False False False False False False False False False]]', map_features_energy: '[[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]\n [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]]'"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# ✅ Optimized generation parameters
generated_output = model.generate(
    **inputs,
    max_new_tokens=max_new_tokens,
    pad_token_id=tokenizer.pad_token_id,
    do_sample=True,
    temperature=0.7,
    top_k=50,
    top_p=0.9,
    use_cache=True  # ✅ Enable KV caching for faster generation
)

# ✅ Efficient decoding
response = tokenizer.decode(generated_output[0], skip_special_tokens=True)
print(response)

In [None]:
max_new_tokens = 1024

# ✅ Optimized input preparation
prompt = "what the fuck is going on bitch ass"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

In [None]:
type(inputs)

In [None]:
dir(inputs)

In [None]:
inputs.values()

In [None]:
model

In [None]:
# ✅ Optimized generation parameters
generated_output = model.generate(
    **inputs,
    max_new_tokens=max_new_tokens,
    pad_token_id=tokenizer.pad_token_id,
    do_sample=True,
    temperature=0.7,
    top_k=50,
    top_p=0.9,
    use_cache=True  # ✅ Enable KV caching for faster generation
)

# ✅ Efficient decoding
response = tokenizer.decode(generated_output[0], skip_special_tokens=True)
print(response)