<a href="https://colab.research.google.com/github/oluwafemidiakhoa/AIreasearcher/blob/main/ToT_game.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import numpy as np
from openai import OpenAI
import json
import os

api_key =""
client = OpenAI(api_key=api_key)

BOARD_SIZE = 8
NUM_MINES = 10
NUM_SIMULATIONS = 10
# --- Game Setup ---
def generate_board():
    board = np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)
    mines = set()
    while len(mines) < NUM_MINES:
        r, c = random.randint(0, BOARD_SIZE-1), random.randint(0, BOARD_SIZE-1)
        if (r, c) not in mines:
            mines.add((r, c))
            board[r][c] = -1  # -1 represents a mine

    # Fill in adjacent mine counts
    for r in range(BOARD_SIZE):
        for c in range(BOARD_SIZE):
            if board[r][c] == -1:
                continue
            count = 0
            for dr in [-1, 0, 1]:
                for dc in [-1, 0, 1]:
                    if 0 <= r+dr < BOARD_SIZE and 0 <= c+dc < BOARD_SIZE:
                        if board[r+dr][c+dc] == -1:
                            count += 1
            board[r][c] = count
    return board, mines

def get_neighbors(r, c):
    return [(r+dr, c+dc) for dr in [-1, 0, 1] for dc in [-1, 0, 1]
            if 0 <= r+dr < BOARD_SIZE and 0 <= c+dc < BOARD_SIZE and (dr, dc) != (0, 0)]

# --- Baseline Agent ---
def baseline_agent(board, revealed):
    unrevealed = [(r, c) for r in range(BOARD_SIZE) for c in range(BOARD_SIZE)
                  if not revealed[r][c]]
    return random.choice(unrevealed)

In [None]:
def board_to_text(board, revealed):
    lines = []
    for r in range(BOARD_SIZE):
        line = []
        for c in range(BOARD_SIZE):
            if revealed[r][c]:
                line.append(str(board[r][c]))
            else:
                line.append("?")
        lines.append(" ".join(line))
    return "\n".join(lines)

In [None]:

def llm_generate_thoughts(board, revealed, flagged_mines, known_safe, k=3):
    board_text = board_to_text(board, revealed)

    valid_moves = [[r, c] for r in range(BOARD_SIZE) for c in range(BOARD_SIZE) if not revealed[r][c] and [r, c] not in flagged_mines]

    prompt = f"""
You are playing a 8x8 Minesweeper game.

- A number (0–10) shows how many adjacent mines a revealed cell has.
- A '?' means the cell is hidden.
- You have flagged these mines: {flagged_mines}
- You know these cells are safe: {known_safe}
- Your job is to choose ONE hidden cell that is least likely to contain a mine.
- Use the following logic:
  - If a cell shows '1' and touches exactly one '?', that cell must be a mine.
  - If a cell shows '1' and touches one already flagged mine, other neighbors are safe.
  - Cells next to '0's are generally safe.

You have the following board:
{board_text}

Here are all valid hidden cells you can choose from:
{valid_moves}

Step-by-step:
1. List {k} possible cells to click next.
2. For each, explain why it might be safe (based on adjacent numbers and known info).
3. Rate each move from 0.0 to 1.0 as a safety score (1 = definitely safe).

Return your answer in this exact JSON format:
[
  {{ "cell": [row, col], "reason": "...", "score": 0.95 }},
  ...
]
"""

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
        )
        content = response.choices[0].message.content.strip()
        print("\n[THOUGHTS GENERATED]\n", content)
        return json.loads(content)
    except Exception as e:
        print("[Error in LLM Generation]", e)
        return []

def tot_llm_agent(board, revealed, flagged_mines, known_safe):
    thoughts = llm_generate_thoughts(board, revealed, flagged_mines, known_safe, k=5)

    if not thoughts:
        print("[ToT] Falling back to baseline agent due to no thoughts.")
        return baseline_agent(board, revealed)

    thoughts = [t for t in thoughts if 0 <= t["cell"][0] < BOARD_SIZE and 0 <= t["cell"][1] < BOARD_SIZE]
    thoughts.sort(key=lambda x: x["score"], reverse=True)

    for t in thoughts:
        if t["score"] >= 0.9:
            move = tuple(t["cell"])
            print(f"[ToT] Confidently choosing {move} with score {t['score']}")
            return move

    print("[ToT] No high-confidence move found, using baseline.")
    return baseline_agent(board, revealed)


In [None]:
MAX_ATTEMPTS=20
def play_game(agent_fn):
    board, mines = generate_board()
    revealed = [[False for _ in range(BOARD_SIZE)] for _ in range(BOARD_SIZE)]
    flagged_mines = []
    known_safe = []
    safe_cells = BOARD_SIZE * BOARD_SIZE - NUM_MINES
    revealed_count = 0
    attempts = 0

    while revealed_count < safe_cells and attempts < MAX_ATTEMPTS:
        r, c = agent_fn(board, revealed, flagged_mines, known_safe)

        if not (0 <= r < BOARD_SIZE and 0 <= c < BOARD_SIZE):
            attempts += 1
            print(f"Invalid move: {(r, c)}")
            continue
        if [r, c] in flagged_mines:
            print(f"Move is a flagged mine: {(r, c)}")
            continue
        if revealed[r][c]:
            print(f"Cell already revealed: {(r, c)}")
            continue

        if board[r][c] == -1:
            print(f"Hit a mine at {(r, c)}! Flagging and continuing...")
            flagged_mines.append([r, c])
            # Don't reveal, just flag and try again
            continue

        # Reveal safe cell
        revealed[r][c] = True
        known_safe.append([r, c])
        revealed_count += 1

        if board[r][c] == 0:
            for nr, nc in get_neighbors(r, c):
                if not revealed[nr][nc]:
                    revealed[nr][nc] = True
                    known_safe.append([nr, nc])
                    revealed_count += 1

    if attempts >= MAX_ATTEMPTS:
        print("Too many attempts, aborting.")
        return False
    print("Game won!")
    return True

def evaluate_agents():
    print("Running 10 simulations...\n")
    tot_wins=0
    for _ in  range(NUM_SIMULATIONS):
        print("New Game ToT!")
        tot_wins+= play_game(tot_llm_agent)
    print(f"ToT LLM Agent Win Rate  : {tot_wins} / {NUM_SIMULATIONS}")

if __name__ == "__main__":
    evaluate_agents()

Running 10 simulations...

New Game ToT!

[THOUGHTS GENERATED]
 ```json
[
  { "cell": [0, 0], "reason": "The entire board is hidden, so no specific information is available. Choosing a corner might be slightly safer due to fewer adjacent cells.", "score": 0.5 },
  { "cell": [0, 7], "reason": "The entire board is hidden, so no specific information is available. Choosing a corner might be slightly safer due to fewer adjacent cells.", "score": 0.5 },
  { "cell": [7, 0], "reason": "The entire board is hidden, so no specific information is available. Choosing a corner might be slightly safer due to fewer adjacent cells.", "score": 0.5 },
  { "cell": [7, 7], "reason": "The entire board is hidden, so no specific information is available. Choosing a corner might be slightly safer due to fewer adjacent cells.", "score": 0.5 },
  { "cell": [3, 3], "reason": "The entire board is hidden, so no specific information is available. Choosing a central cell might provide more information about surroundi