# Goal is to find prompt that does not directly tell valid moves but is reliably inferred

### fill .env file with ANTHROPIC_API_KEY=sk_
### for Colab, upload .env to Google Colab in Files

In [1]:
!pip install -q python-dotenv
import os
from dotenv import load_dotenv
load_dotenv()

# print(os.environ['ANTHROPIC_API_KEY'])
!pip install -q anthropic
import anthropic

import time

In [2]:
import random
import re
import ast
import copy

class NoughtsAndCrosses:
    def __init__(self, agent1, agent2, first_player=None, board=None):
        self.agent1 = agent1
        self.agent2 = agent2
        self.n = 3
        self.first_player = first_player
        self.board = board
        self.reset()

    def print_field(self):
        for row in self.board:
            print("|".join(row))
            print("-" * 5)

    def reset(self):
        if self.board is None:
            self.board = [[str(i + j * self.n + 1) for i in range(self.n)] for j in range(self.n)]
        if self.first_player is None:
            self.current_player = "X"
        else:
            self.current_player = self.first_player

    def is_winner(self, player):
        for i in range(self.n):
            if all([cell == player for cell in self.board[i]]) or all([self.board[j][i] == player for j in range(self.n)]):
                return True
        if all([self.board[i][i] == player for i in range(3)]) or all([self.board[i][2 - i] == player for i in range(self.n)]):
            return True
        return False

    def is_board_full(self):
        return all(all(cell in ['X', 'O'] for cell in row) for row in self.board)

    def make_move(self, row, col):
        if self.board[row][col] not in ['X', 'O']:
            self.board[row][col] = self.current_player
            return True
        return False

    def switch_player(self):
        self.current_player = "O" if self.current_player == "X" else "X"

    def start_game(self):
        steps = 0
        while True:
            self.print_field()
            if self.current_player == "X":
                agent = self.agent1
            else:
                agent = self.agent2

            position = agent.make_move(self.board, self.current_player)
            row = (position - 1) // self.n  # Integer division to find the row
            col = (position - 1) % self.n   # Modulus to find the column

            result = None
            if self.make_move(row, col):
                if self.is_winner(self.current_player):
                    self.print_field()
                    print(f"Player {self.current_player} wins!")
                    result = self.current_player
                    break
                elif self.is_board_full():
                    self.print_field()
                    print("It's a draw!")
                    result = 'D'
                    break
                self.switch_player()
            else:
                # print("Invalid move, try again.")
                other_player = 'X' if self.current_player == 'O' else 'O'
                if self.is_board_full():
                    assert position == 0, "position was invalid: %s" % position
                    result = 'D'
                elif self.is_winner(self.current_player):
                    assert position == 0, "position was invalid: %s" % position
                    result = self.current_player
                elif self.is_winner(other_player):
                    assert position == 0, "position was invalid: %s" % position
                    result = other_player
                else:
                    raise ValueError("invalid move: %s" % position)
                break
            steps += 1
        self.reset()
        return result, steps

    @staticmethod
    def random_fill_board(num_entries):
        n = 3
        count = 0
        board = [['1', '2', '3'], ['4', '5', '6'], ['7', '8', '9']]

        while count < num_entries:
            row = random.randint(0, n-1)
            col = random.randint(0, n-1)

            # Check if the position is already filled
            if board[row][col] not in ['X', 'O']:
                # Randomly choose 'X' or 'O' to place on the board
                board[row][col] = random.choice(['X', 'O'])
                count += 1
        return board


class RandomAgent:
    n = 3
    def make_move(self, board, player):
        # LLM logic to determine the move
        # Placeholder implementation; you'll need to integrate with an actual LLM here
        # For now, this just returns a random empty cell
        import random
        empty_cells = [(i, j) for i in range(self.n) for j in range(3) if board[i][j] not in ['X', 'O']]
        return random.choice(empty_cells) if empty_cells else (0, 0)


class LLMAgent:
    n = 3
    def __init__(self):
        # self.model = "claude-3-haiku-20240307"
        self.model = "claude-3-opus-20240229"
        self.max_tokens = 3500
        self.client = anthropic.Anthropic()

    def get_response(self,
                     prompt,
                     model_kwargs={}):
        system = f'You are an expert tic-tac-toe player who is very focused and thinks exhaustively before making any move.  You play strategically.  You make no mistakes.\n'
        print(prompt)
        response = self.client.messages.create(
                model=self.model,
                max_tokens=self.max_tokens,
                system=system,
                temperature=0,
                messages=[
                    {"role": "user", "content": prompt}
                ],
                **model_kwargs,
            )
        return response.content[0].text

    def make_move(self, board, player):
        other_player = 'X' if player == 'O' else 'O'
        board_str = "\n".join([" ".join(row) for row in board])
        prompt =  f"The current board as a 2d grid is:\n<current_board>\n{board_str}\n</current_board>\n." \
                  f"\nValid moves (according to the above 2d grid) are only those with numeric values." \
                  f"\nIt's player {player}'s turn. You are player {player}." \
                  f"\nIn your response, ensure you follow these steps:" \
                  f"\n1) provide your thoughts in <thinking> </thinking> xml tags, within which you try out several valid unfilled moves for {player}." \
                  f"\n   Remember, valid moves are only those on current board that are numeric values only (not X or O)." \
                  f"\n2) Consider which move is best and valid (unfilled) for player {player} given all prior information." \
                  f"\n3) Propose a hypothetical valid (unfilled) move for {player}, placing that numerical value inside <hypothetical> </hypothetical> xml tags." \
                  f"\n4) Evaluate step-by-step whether the hypothetic move is a mistake inside <is_mistake> </is_mistake> xml tags." \
                  f"\n5) Finally, in order to play optimally or at least block player {other_player}, decide on a final position (unfilled value that is still numeric in the current board) for {player}." \
                  f"\nRemember: The current board as a 2d grid is:\n<current_board>\n{board_str}\n</current_board>\n." \
                  f"\nRemember: Valid moves are only those on current board that are numeric values only (not X or O) and so are unfilled." \
                  f"\nPut your final position for the unfilled numeric value inside <move> </move> xml tags.  If the board is filled, do not use the <move> xml tags at all."

        position_str = self.get_response(prompt)

        pattern = r'<thinking>(.*?)</thinking>'
        # re.DOTALL allows dot (.) to match newlines as well
        thoughts = re.findall(pattern, position_str, re.DOTALL)

        #pattern = r'<opponent>(.*?)</opponent>'
        ## re.DOTALL allows dot (.) to match newlines as well
        #opponent = re.findall(pattern, position_str, re.DOTALL)

        pattern = r'<hypothetical>(.*?)</hypothetical>'
        hypothetical = re.findall(pattern, position_str, re.DOTALL)

        pattern = r'<is_mistake>(.*?)</is_mistake>'
        is_mistake = re.findall(pattern, position_str, re.DOTALL)

        pattern = r'<move>(.*?)</move>'
        move = next(iter(re.findall(pattern, position_str)), '0')
        if not move:
            move = '0'

        print('Thoughts:\n','\n'.join(thoughts))
        #print('Opponent:\n', '\n'.join(opponent))
        print('hypothetical:\n', '\n'.join(hypothetical))
        print('is_mistake:\n', '\n'.join(is_mistake))
        print('move:\n', move)
        return ast.literal_eval(move)


class HumanAgent:
    n = 3  # The size of the game board

    def make_move(self, board, player):
        while True:
            try:
                position = int(input(f"Enter your move (1-{self.n*self.n}): "))
                if position < 1 or position > self.n*self.n:
                    print(f"Invalid input. Please enter a number between 1 and {self.n*self.n}.")
                    continue

                # Convert the position to row and column
                row = (position - 1) // self.n  # Integer division to find the row
                col = (position - 1) % self.n   # Modulus to find the column

                # Check if the position is already taken (assuming the board is initialized with numbers 1-n*n)
                if board[row][col] in ['X', 'O']:
                    print("Position already taken. Please choose another move.")
                else:
                    # Return the tuple (row, col), which is what the game expects
                    return (row * self.n) + col + 1  # Convert back to 1-based index
            except ValueError:
                print("Please enter a valid integer.")


In [3]:
agent1 = LLMAgent()
agent2 = LLMAgent()

In [4]:
results = []
for i in range(100):
    board = NoughtsAndCrosses.random_fill_board(i % 10)
    start_board = copy.deepcopy(board)
    first_player = random.choice(['O', 'X'])
    game = NoughtsAndCrosses(agent1, agent2, board=board, first_player=first_player)
    print("-----------------------")
    print(f"GAME {i} {first_player}")
    game.print_field()
    print("-----------------------")
    result, steps = game.start_game()
    results.append(dict(first_player=first_player, steps=steps, winner=result, start_board=start_board, board=copy.deepcopy(game.board)))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
X 8 X
This does not result in a win for X.

hypothetical:
 3
is_mistake:
 
Playing in position 3 is not a mistake. It results in X winning the game. None of the other potential moves lead to a guaranteed win for X this turn.

move:
 3
1|X|X
-----
O|O|X
-----
X|8|9
-----
The current board as a 2d grid is:
<current_board>
1 X X
O O X
X 8 9
</current_board>
.
Valid moves (according to the above 2d grid) are only those with numeric values.
It's player O's turn. You are player O.
In your response, ensure you follow these steps:
1) provide your thoughts in <thinking> </thinking> xml tags, within which you try out several valid unfilled moves for O.
   Remember, valid moves are only those on current board that are numeric values only (not X or O).
2) Consider which move is best and valid (unfilled) for player O given all prior information.
3) Propose a hypothetical valid (unfilled) move for O, placing that numerical value inside

In [5]:
import pprint
print(len(results))
pprint.pprint(results)

100
[{'board': [['O', 'X', 'X'], ['O', 'O', 'O'], ['X', '8', '9']],
  'first_player': 'O',
  'start_board': [['1', '2', '3'], ['4', '5', '6'], ['7', '8', '9']],
  'steps': 6,
  'winner': 'O'},
 {'board': [['O', 'O', 'O'], ['4', 'O', 'X'], ['X', 'O', 'X']],
  'first_player': 'O',
  'start_board': [['O', '2', '3'], ['4', '5', '6'], ['7', '8', '9']],
  'steps': 6,
  'winner': 'O'},
 {'board': [['X', '2', '3'], ['X', 'X', '6'], ['O', 'O', 'O']],
  'first_player': 'X',
  'start_board': [['X', '2', '3'], ['4', '5', '6'], ['7', '8', 'O']],
  'steps': 3,
  'winner': 'O'},
 {'board': [['1', 'X', '3'], ['O', 'X', '6'], ['O', 'O', 'O']],
  'first_player': 'O',
  'start_board': [['1', 'X', '3'], ['O', '5', '6'], ['7', '8', 'O']],
  'steps': 2,
  'winner': 'O'},
 {'board': [['1', '2', 'O'], ['4', 'X', 'O'], ['7', 'X', 'O']],
  'first_player': 'O',
  'start_board': [['1', '2', 'O'], ['4', 'X', '6'], ['7', 'X', 'O']],
  'steps': 0,
  'winner': 'O'},
 {'board': [['X', 'O', 'O'], ['4', 'O', 'O'], ['O',