# Battleship

In [3]:
import autogen
from autogen.trace import trace_op, node
from autogen.trace.trace_ops import TraceExecutionError
from autogen.trace.optimizers import FunctionOptimizer
from autogen.trace.nodes import GRAPH

## Scenario 0: Produce valid code to generate (x, y)
Challenge:
1. Need to have the code from end-to-end that's runnable and throws no exception
2. Does not know the data type of the input
3. Does not know the width/height of the board

In [2]:
from battleship import BattleshipBoard

In [5]:
@trace_op("[select_coordinate] Given a map, select a valid coordinate.", trainable=True)
def select_coordinate(map):
    """
    Given a map, select a valid coordinate.
    """
    return map

def user_fb_for_coords_validity(board, coords):
    try:
        board.check_shot(coords[0], coords[1])
        return "Valid coordinates.", 1
    except Exception as e:
        return str(e), 0

In [4]:
GRAPH.clear()

board = BattleshipBoard(10, 10)

x = node(board.board, trainable=False)
optimizer = FunctionOptimizer([select_coordinate.parameter], config_list=autogen.config_list_from_json("OAI_CONFIG_LIST"))

feedback = ""
reward = 0

while reward != 1:
    try:
        output = select_coordinate(x)
        feedback, reward = user_fb_for_coords_validity(board, output.data)
    except TraceExecutionError as e:
        output = e.exception_node
        feedback = output.data
        
    optimizer.zero_feedback()
    optimizer.backward(output, feedback)

    print(f"output={output.data}, feedback={feedback}, variables=\n")  # logging
    for p in optimizer.parameters:
        print(p.name, p.data)
    optimizer.step(verbose=False)

output=x, feedback=string index out of range, variables=

__code:0 def select_coordinate(map):
    """
    Given a map, select a valid coordinate.
    """
    return 'x'
output=x, feedback=string index out of range, variables=

__code:0 def select_coordinate(map):
    """
    Given a map, select a valid coordinate.
    """
    return 'x'
output=(0, 0), feedback=Valid coordinates., variables=

__code:0 def select_coordinate(map):
    for i in range(len(map)):
        for j in range(len(map[i])):
            if map[i][j] is not None:
                return (i, j)
    return None


## Scenario 1: Learning to place shots well (single board).
Important: to avoid bias, we can't tell LLM this is a battleship game.

Challenge:
1. Need to know not to place shots on the same spot
2. Need to know to place shots on the board
3. Need to develop basic heuristics (battleships are either vertical or horizontal)

In [6]:
from battleship import BattleshipBoard

@trace_op("[select_coordinate] Given a map, select a valid coordinate to see if we can earn reward.", trainable=True)
def select_coordinate(map):
    """
    Given a map, select a valid coordinate. We might earn reward from this coordinate.
    """
    return [0, 0]

def user_fb_for_placing_shot(board, coords):
    # this is already a multi-step cumulative reward problem
    # obs, reward, terminal, feedback
    try:
        reward = board.check_shot(coords[0], coords[1])
        new_map = board.get_shots()
        terminal = board.check_terminate()
        return new_map, reward, terminal, f"Got {reward} reward."
    except Exception as e:
        return board.get_shots(), 0, False, str(e)

In [3]:
GRAPH.clear()

board = BattleshipBoard(5, 5, num_each_type=1, exclude_ships=['C', 'B'])

In [4]:
board.visualize_board()

R R R . .
. . . . .
. . . . D
S S S . D
. . . . .



In [7]:
GRAPH.clear()

board = BattleshipBoard(5, 5, num_each_type=1, exclude_ships=['C', 'B'])
print("Ground State Board")
board.visualize_board()

obs = node(board.get_shots(), trainable=False)
optimizer = FunctionOptimizer([select_coordinate.parameter], config_list=autogen.config_list_from_json("OAI_CONFIG_LIST"))

feedback = ""
terminal = False
cum_reward = 0
max_calls = 10

while not terminal and max_calls > 0:
    # This is also online optimization
    # we have the opportunity to keep changing the function with each round of interaction
    try:
        output = select_coordinate(obs)
        obs, reward, terminal, feedback = user_fb_for_placing_shot(board, output.data)
    except TraceExecutionError as e:
        # this is essentially a retry
        output = e.exception_node
        feedback = output.data
        terminal = False
        reward = 0

    print("Obs:")
    board.visualize_shots()

    cum_reward += reward
        
    optimizer.zero_feedback()
    optimizer.backward(output, feedback)

    print(f"output={output.data}, feedback={feedback}, variables=\n")  # logging
    for p in optimizer.parameters:
        print(p.name, p.data)
    optimizer.step(verbose=False)
    max_calls -= 1

Ground State Board
. . . . .
. S S S .
D D . . R
. . . . R
. . . . R

Obs:
O . . . .
. . . . .
. . . . .
. . . . .
. . . . .

output=[0, 0], feedback=Got False reward., variables=

__code:0 def select_coordinate(map):
    """
    Given a map, select a valid coordinate. We might earn reward from this coordinate.
    """
    return [0, 0]
Obs:
O . . . .
. X . . .
. . . . .
. . . . .
. . . . .

output=[1, 1], feedback=Got True reward., variables=

__code:0 def select_coordinate(map):
    return [1, 1]
Obs:
O . . . .
. X . . .
. . . . .
. . . . .
. . . . .

output=[1, 1], feedback=Got False reward., variables=

__code:0 def select_coordinate(map):
    return [1, 1]
Obs:
O . . . .
. X . . .
. . O . .
. . . . .
. . . . .

output=[2, 2], feedback=Got False reward., variables=

__code:0 def select_coordinate(map):
    return [2, 2]
Obs:
O . . . .
. X . . .
. . O . .
. . . . .
. . . . .

output=name 'reward_x' is not defined, feedback=name 'reward_x' is not defined, variables=

__code:0 def sel

TypeError: select_coordinate() missing 2 required positional arguments: 'reward_x' and 'reward_y'