<a href="https://colab.research.google.com/github/kuberiitb/artificial_intelligence/blob/main/AI101/03_tic_tac_toe_judge_prompt_engineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install required pckages

In [1]:
!pip install langchain-groq langchain --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/137.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/495.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m495.8/495.8 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[?25h

## Import required pckages

In [2]:
import os
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate

## Load the GROK KEY
- This step is necessary for the safety of your key
- Upload your .env file in colab with the values in this format:
- ```GROQ_API_KEY=<KEY YOU COPIED>```
- Then load the key with below command using dotenv package.


In [3]:
from dotenv import load_dotenv
load_dotenv("/content/.env")

True

### Above command output should say True, otherwise your keys are not loaded.

### Setting GROQ_API_KEY to environment so that langchain can access it

In [4]:
if not os.environ["GROQ_API_KEY"]:
  os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

### Now let's learn how to set a prompt template and chain.

In [5]:
# Initialize the Groq LLM
# llm = ChatGroq(model_name="llama-3.1-8b-instant", temperature=0.7) #smaller/cheaper model
llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.7) #bigger/costlier model

# Define a prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a Tic-tac-toe game judge.\n
    Check the game below given in text format.\n
    print the game input.
    Check(and print) if a character(x or o) is repeated 3 times in a row or a column.
    Check(and print) if a character is repeated 3 times in a diagonal?
    Then decide who wins, and tell why(row x or column x or diagonal).
    No further explaination.
    Give output in below format:
    Winner: x or o
    Reason: Row-1 or Col-2 or Diagonal-1 or Diagonal-2

    Example:
    Game:
    o o x
    o o x
    o x o

    Output:
    Winner: o
    Reason: Diagonal-1

    """),
    ("user", "{input}")
])

# Create a chain
chain = prompt | llm

In [6]:
test_input_1 = "o o x\nx o x\no x x"

# Invoke the chain
response = chain.invoke({"input": test_input_1})
print(response.content)

Game:
o o x
x o x
o x x

No character repeated 3 times in a row.
No character repeated 3 times in a column.
No character repeated 3 times in a diagonal.

Winner: None
Reason: No winner


In [7]:
test_input_2 = "o o x\no o x\nx x o"

# Invoke the chain
response = chain.invoke({"input": test_input_2})
print(response.content)

Game:
o o x
o o x
x x o

Checking rows for repeat: No row has a character repeated 3 times.
Checking columns for repeat: Column 1 has 'o' repeated 3 times.
Checking diagonals for repeat: No diagonal has a character repeated 3 times.

Winner: o
Reason: Col-1


In [8]:
improved_system_prompt = """"
You are a Tic-Tac-Toe game judge.

Rules you MUST follow strictly:
1. The board is always a 3×3 grid.
2. Rows are checked first (Row-1, Row-2, Row-3).
3. Then columns (Col-1, Col-2, Col-3).
4. Then diagonals:
   - Diagonal-1 = top-left to bottom-right
   - Diagonal-2 = top-right to bottom-left
5. A player wins ONLY if the SAME symbol (x or o) appears exactly 3 times
   in ONE row, ONE column, or ONE diagonal.
6. Do NOT guess. If no such line exists, say Winner: None.

Step-by-step process (do NOT skip steps):
- Print the game board exactly as given.
- Explicitly check ALL rows, ALL columns, and BOTH diagonals.
- Decide the winner ONLY after all checks are complete.

Output rules:
- Print the Input Game
- Do NOT add explanations or extra text.
- Follow the output format EXACTLY.

Output format:
GAME:
<INPUT GAME>
Winner: <ONE OF x | o | None>
Reason: <ONE OF Row-1 | Row-2 | Row-3 | Col-1 | Col-2 | Col-3 | Diagonal-1 | Diagonal-2 | None >

Example:
Game:
o o x
o o x
o x o

Output:
Winner: o
Reason: Diagonal-1

Now judge the following game:

Game:
{GAME_BOARD}

"""

improved_prompt = ChatPromptTemplate.from_messages([
    ("system", improved_system_prompt),
    ("user", "{GAME_BOARD}")
])

# Create a chain
improved_chain = improved_prompt | llm

In [9]:
# Invoke the chain
response = improved_chain.invoke({"GAME_BOARD": test_input_1})
print(response.content)

GAME:
o o x
x o x
o x x
Winner: x
Reason: Row-3


### Even this is not working. we need stronger model or better input format to make this game work.

## Let's extract rows, columns and diagonals and give it to the LLM

In [10]:
def extract_rows_cols_diags(game, separator=" "):
  print("Game:")
  print(game)
  rows = [row.split(separator) for row in game.split("\n")]
  for idx, x in enumerate(rows):
    print(f"Row-{idx+1} {" ".join(x)}")

  cols = []
  for i in range(len(rows[0])):
    temp_col = [rows[x][i] for x in range(3)]
    cols.append(temp_col)
  for idx, x in enumerate(cols):
    print(f"Col-{idx+1} {" ".join(x)}")

  diags = []
  diags.append([rows[x][x] for x in range(3)])
  diags.append([rows[x][2-x] for x in range(3)])
  for idx, x in enumerate(diags):
    print(f"Diag-{idx+1} {" ".join(x)}")

extract_rows_cols_diags(test_input_2)

Game:
o o x
o o x
x x o
Row-1 o o x
Row-2 o o x
Row-3 x x o
Col-1 o o x
Col-2 o o x
Col-3 x x o
Diag-1 o o o
Diag-2 x o x


In [11]:
improved_system_prompt_with_detailed_input = """
You are a deterministic Tic-Tac-Toe judge.

IMPORTANT RULES (follow strictly):
1. Do NOT recompute rows, columns, or diagonals.
2. Use ONLY the provided Row, Col, and Diag lines.
3. A player wins ONLY if:
   - the SAME symbol (x or o)
   - appears EXACTLY 3 times
   - in ONE single Row, Col, or Diag line.
4. Check in this exact priority order:
   Row-1 → Row-2 → Row-3 →
   Col-1 → Col-2 → Col-3 →
   Diag-1 → Diag-2
5. If no line satisfies the win condition, the winner is None.
6. Do NOT guess. Do NOT infer. Do NOT explain.

Output format (EXACT, no extra text):
Winner: x | o | None
Reason: Row-1 | Row-2 | Row-3 | Col-1 | Col-2 | Col-3 | Diag-1 | Diag-2 | None

Example:
Input:
Row-1 o o x
Row-2 o o x
Row-3 x x o
Col-1 o o x
Col-2 o o x
Col-3 x x o
Diag-1 o o o
Diag-2 x o x

Output:
Winner: o
Reason: Diag-1

Now judge the following input exactly as per rules:

{INPUT}

"""

improved_prompt_with_detailed_input = ChatPromptTemplate.from_messages([
    ("system", improved_system_prompt_with_detailed_input),
    ("user", "{INPUT}")
])

# Create a chain
improved_chain_with_detailed_input = improved_prompt_with_detailed_input | llm

response = improved_chain_with_detailed_input.invoke({"INPUT": extract_rows_cols_diags(test_input_2)})
print(response.content)

Game:
o o x
o o x
x x o
Row-1 o o x
Row-2 o o x
Row-3 x x o
Col-1 o o x
Col-2 o o x
Col-3 x x o
Diag-1 o o o
Diag-2 x o x
Winner: None
Reason: None


## Even this is not working.

Before trying a new model, let us set a evaluation data.

In [12]:
evaluation_data = [

    # ---------- ROW WINS (x) ----------
    ["x x x\no o -\n- - -", "x", "Row-1"],
    ["o o -\nx x x\n- - -", "x", "Row-2"],
    ["o o -\n- - -\nx x x", "x", "Row-3"],
    ["x x x\no - -\n- o -", "x", "Row-1"],
    ["- o -\nx x x\no - -", "x", "Row-2"],
    ["o - -\n- o -\nx x x", "x", "Row-3"],
    ["x x x\nx o o\n- o -", "x", "Row-1"],
    ["o o x\nx x x\n- o o", "x", "Row-2"],
    ["o - -\nx o o\nx x x", "x", "Row-3"],

    # ---------- ROW WINS (o) ----------
    ["o o o\nx x -\n- - -", "o", "Row-1"],
    ["x x -\no o o\n- - -", "o", "Row-2"],
    ["x x -\n- - -\no o o", "o", "Row-3"],
    ["o o o\nx - -\n- x -", "o", "Row-1"],
    ["- x -\no o o\nx - -", "o", "Row-2"],
    ["x - -\n- x -\no o o", "o", "Row-3"],
    ["o o o\no x x\n- x -", "o", "Row-1"],
    ["x x o\no o o\n- x x", "o", "Row-2"],
    ["x - -\no x x\no o o", "o", "Row-3"],

    # ---------- COLUMN WINS (x) ----------
    ["x o -\nx o -\nx - -", "x", "Col-1"],
    ["x o o\nx - o\nx - -", "x", "Col-1"],
    ["x - o\nx o -\nx o -", "x", "Col-1"],
    ["o x -\no x -\n- x -", "x", "Col-2"],
    ["o x o\n- x o\n- x -", "x", "Col-2"],
    ["- x o\no x -\n- x o", "x", "Col-2"],
    ["o o x\n- - x\n- - x", "x", "Col-3"],
    ["o - x\no - x\n- - x", "x", "Col-3"],
    ["- - x\no o x\n- o x", "x", "Col-3"],

    # ---------- COLUMN WINS (o) ----------
    ["o x -\no x -\no - -", "o", "Col-1"],
    ["o x x\no - x\no - -", "o", "Col-1"],
    ["o - x\no x -\no x -", "o", "Col-1"],
    ["x o -\nx o -\n- o -", "o", "Col-2"],
    ["x o x\n- o x\n- o -", "o", "Col-2"],
    ["- o x\nx o -\n- o x", "o", "Col-2"],
    ["x x o\n- - o\n- - o", "o", "Col-3"],
    ["x - o\nx - o\n- - o", "o", "Col-3"],
    ["- - o\nx x o\n- x o", "o", "Col-3"],

    # ---------- DIAGONAL WINS (x) ----------
    ["x o -\no x -\n- x o", "x", "Diag-LR"],
    ["x - o\n- x o\n- - x", "x", "Diag-LR"],
    ["x o o\n- x -\no - x", "x", "Diag-LR"],
    ["- o x\no x -\nx - -", "x", "Diag-RL"],
    ["o - x\n- x o\nx - -", "x", "Diag-RL"],
    ["o o x\n- x -\nx o -", "x", "Diag-RL"],

    # ---------- DIAGONAL WINS (o) ----------
    ["o x -\nx o -\n- o x", "o", "Diag-LR"],
    ["o - x\n- o x\n- - o", "o", "Diag-LR"],
    ["o x x\n- o -\nx - o", "o", "Diag-LR"],
    ["- x o\nx o -\no - -", "o", "Diag-RL"],
    ["x - o\n- o x\no - -", "o", "Diag-RL"],
    ["x x o\n- o -\no x -", "o", "Diag-RL"],

    # ---------- DRAWS ----------
    ["x o x\no x o\no x o", "draw", "No-3-in-a-row"],
    ["o x o\nx o x\nx o x", "draw", "No-3-in-a-row"],
    ["x o o\no x x\nx o o", "draw", "No-3-in-a-row"],
    ["o x x\nx o o\no o x", "draw", "No-3-in-a-row"],
    ["x o x\nx o o\no x x", "draw", "No-3-in-a-row"],
    ["o x o\no x x\nx o o", "draw", "No-3-in-a-row"],

    # ---------- INVALID STATES ----------
    ["x x x\no o o\n- - -", "invalid", "Multiple-winners"],
    ["x x x\nx x x\no o o", "invalid", "Multiple-winners"],
    ["x x x\nx x -\n- - -", "invalid", "Too-many-moves"],
    ["o o o\no o -\n- - -", "invalid", "Too-many-moves"],
    ["x x x\nx o o\nx o o", "invalid", "Illegal-move-count"],
    ["o o o\nx x x\nx o o", "invalid", "Multiple-winners"],
]

print(len(evaluation_data))  # 120


60


In [13]:
evaluation_data[0][0]

'x x x\no o -\n- - -'

In [14]:
extract_rows_cols_diags(evaluation_data[0][0])

Game:
x x x
o o -
- - -
Row-1 x x x
Row-2 o o -
Row-3 - - -
Col-1 x o -
Col-2 x o -
Col-3 x - -
Diag-1 x o -
Diag-2 x o -


In [15]:
response = improved_chain_with_detailed_input.invoke({"INPUT": extract_rows_cols_diags(evaluation_data[0][0])})
print(response.content)

Game:
x x x
o o -
- - -
Row-1 x x x
Row-2 o o -
Row-3 - - -
Col-1 x o -
Col-2 x o -
Col-3 x - -
Diag-1 x o -
Diag-2 x o -
Winner: None
Reason: None


## Let's try GPT model instead of LLAMA

In [20]:
llm_gpt = ChatGroq(model_name="openai/gpt-oss-20b", temperature=0.7) #opensourced gpt model

# Create a chain
improved_chain_with_detailed_input_gpt = improved_prompt_with_detailed_input | llm_gpt

response = improved_chain_with_detailed_input_gpt.invoke({"INPUT": extract_rows_cols_diags(test_input_2)})
print(response.content)

Game:
o o x
o o x
x x o
Row-1 o o x
Row-2 o o x
Row-3 x x o
Col-1 o o x
Col-2 o o x
Col-3 x x o
Diag-1 o o o
Diag-2 x o x
Winner: None
Reason: None


In [21]:
improved_system_prompt = """"
You are a Tic-Tac-Toe game judge.

Rules you MUST follow strictly:
1. The board is always a 3×3 grid.
2. Rows are checked first (Row-1, Row-2, Row-3).
3. Then columns (Col-1, Col-2, Col-3).
4. Then diagonals:
   - Diagonal-1 = top-left to bottom-right
   - Diagonal-2 = top-right to bottom-left
5. A player wins ONLY if the SAME symbol (x or o) appears exactly 3 times
   in ONE row, ONE column, or ONE diagonal.
6. Do NOT guess. If no such line exists, say Winner: None.

Step-by-step process (do NOT skip steps):
- Print the game board exactly as given.
- Explicitly check ALL rows, ALL columns, and BOTH diagonals.
- Decide the winner ONLY after all checks are complete.

Output rules:
- Print the Input Game
- Do NOT add explanations or extra text.
- Follow the output format EXACTLY.

Output format:
GAME:
<INPUT GAME>
Winner: <ONE OF x | o | None>
Reason: <ONE OF Row-1 | Row-2 | Row-3 | Col-1 | Col-2 | Col-3 | Diagonal-1 | Diagonal-2 | None >

Example:
Game:
o o x
o o x
o x o

Output:
Winner: o
Reason: Diagonal-1

Now judge the following game:

Game:
{GAME_BOARD}

"""

improved_prompt = ChatPromptTemplate.from_messages([
    ("system", improved_system_prompt),
    ("user", "{GAME_BOARD}")
])

# Create a chain
improved_chain_gpt = improved_prompt | llm_gpt

response = improved_chain_gpt.invoke({"GAME_BOARD": test_input_2})
print(response.content)

GAME:
o o x
o o x
x x o
Winner: o
Reason: Diagonal-1


In [22]:
response = improved_chain_gpt.invoke({"GAME_BOARD": test_input_1})
print(response.content)

GAME:
o o x
x o x
o x x
Winner: x
Reason: Col-3


## Now we can run a loop over evaluation data and get accuracy of different prompt