# Connect-4 data generator

In [1]:
import numpy as np
import random
from tqdm import tqdm
import time
import pandas as pd
import os
from pathlib import Path
import openpyxl
import xlsxwriter

print("All dependencies imported successfully.")

All dependencies imported successfully.


In [2]:
PLAYS = {
    "L6L1": {"A": 6, "B": 1, "games": 2},
    "L1L6": {"A": 1, "B": 6, "games": 2},
    "L6L2": {"A": 6, "B": 2, "games": 2},
    "L2L6": {"A": 2, "B": 6, "games": 2},
    "L6L3": {"A": 6, "B": 3, "games": 2},
    "L3L6": {"A": 3, "B": 6, "games": 2},
    "L6L4": {"A": 6, "B": 4, "games": 2},
    "L4L6": {"A": 4, "B": 6, "games": 2},
    "L6L5": {"A": 6, "B": 5, "games": 2},
    "L5L6": {"A": 5, "B": 6, "games": 2},

    "L6":   {"A": 6, "B": 6, "games": 2},
    "L7L1": {"A": 7, "B": 1, "games": 2},
    "L1L7": {"A": 1, "B": 7, "games": 2},
    "L7L2": {"A": 7, "B": 2, "games": 2},
    "L2L7": {"A": 2, "B": 7, "games": 2},
    "L7L3": {"A": 7, "B": 3, "games": 2},
    "L3L7": {"A": 3, "B": 7, "games": 2},
    "L7L4": {"A": 7, "B": 4, "games": 2},
    "L4L7": {"A": 4, "B": 7, "games": 2},
    "L7L5": {"A": 7, "B": 5, "games": 2},
    "L5L7": {"A": 5, "B": 7, "games": 2},
    "L7L6": {"A": 7, "B": 6, "games": 2},
    "L6L7": {"A": 6, "B": 7, "games": 2},

    "L7":   {"A": 7, "B": 7, "games": 2},
}

In [3]:
DATA_FILE = "C4.data.xlsx"

if os.path.exists(DATA_FILE):
    DATA = pd.read_excel(DATA_FILE)
else:
    DATA = pd.DataFrame()

In [4]:
from C4.connect4_env import Connect4Env
from C4.connect4_lookahead import Connect4Lookahead

In [5]:
begin_start_time = time.time()
Lookahead = Connect4Lookahead()
#ENV = Connect4Env()

In [6]:
def play_game(lookA: int, lookB: int, label: str) -> dict:
    """
    Play one game: A starts (player=+1), B responds (player=-1).
    Both use n_step_lookahead with their depths.
    Return dict: {"label":..., "winner":...,"0-0":...,"0-1":..., ... "5-6":...}
    """
    env = Connect4Env()
    board = env.reset()
    done = False
    current_player = 1   # A always starts as +1

    while not done:
        depth = lookA if current_player == 1 else lookB
        move = Lookahead.n_step_lookahead(board, current_player, depth=depth)
        board, reward, done = env.step(move)
        if done:
            winner = env.winner if env.winner is not None else 0
            break
        current_player *= -1

    # Flatten final board into dict with "row-col" keys
    row_dict = {"label": label, "winner": winner}
    for r in range(env.ROWS):
        for c in range(env.COLS):
            row_dict[f"{r}-{c}"] = int(env.board[r, c])
    return row_dict


In [7]:
from tqdm import tqdm
import pandas as pd

new_rows = []

for label, cfg in PLAYS.items():
    print(f"Generating games for {label} (A={cfg['A']} vs B={cfg['B']}, {cfg['games']} games)")
    for g in tqdm(range(cfg["games"]), desc=f"{label}", leave=True):
        new_rows.append(play_game(cfg["A"], cfg["B"], label))

df_new = pd.DataFrame(new_rows)

Generating games for L6L1 (A=6 vs B=1, 2 games)


L6L1: 100%|██████████| 2/2 [21:16<00:00, 638.04s/it]


Generating games for L1L6 (A=1 vs B=6, 2 games)


L1L6: 100%|██████████| 2/2 [18:21<00:00, 550.69s/it]


Generating games for L6L2 (A=6 vs B=2, 2 games)


L6L2: 100%|██████████| 2/2 [17:46<00:00, 533.23s/it]


Generating games for L2L6 (A=2 vs B=6, 2 games)


L2L6: 100%|██████████| 2/2 [25:30<00:00, 765.12s/it]


Generating games for L6L3 (A=6 vs B=3, 2 games)


L6L3: 100%|██████████| 2/2 [26:53<00:00, 806.96s/it]


Generating games for L3L6 (A=3 vs B=6, 2 games)


L3L6: 100%|██████████| 2/2 [25:55<00:00, 777.92s/it]


Generating games for L6L4 (A=6 vs B=4, 2 games)


L6L4: 100%|██████████| 2/2 [27:28<00:00, 824.29s/it]


Generating games for L4L6 (A=4 vs B=6, 2 games)


L4L6: 100%|██████████| 2/2 [23:06<00:00, 693.12s/it]


Generating games for L6L5 (A=6 vs B=5, 2 games)


L6L5: 100%|██████████| 2/2 [32:26<00:00, 973.15s/it]


Generating games for L5L6 (A=5 vs B=6, 2 games)


L5L6: 100%|██████████| 2/2 [38:32<00:00, 1156.25s/it]


Generating games for L6 (A=6 vs B=6, 2 games)


L6: 100%|██████████| 2/2 [42:50<00:00, 1285.45s/it]


Generating games for L7L1 (A=7 vs B=1, 2 games)


L7L1: 100%|██████████| 2/2 [1:30:17<00:00, 2708.98s/it]


Generating games for L1L7 (A=1 vs B=7, 2 games)


L1L7: 100%|██████████| 2/2 [1:27:16<00:00, 2618.48s/it]


Generating games for L7L2 (A=7 vs B=2, 2 games)


L7L2: 100%|██████████| 2/2 [1:02:07<00:00, 1863.56s/it]


Generating games for L2L7 (A=2 vs B=7, 2 games)


L2L7: 100%|██████████| 2/2 [1:29:53<00:00, 2696.69s/it]


Generating games for L7L3 (A=7 vs B=3, 2 games)


L7L3: 100%|██████████| 2/2 [51:36<00:00, 1548.47s/it]


Generating games for L3L7 (A=3 vs B=7, 2 games)


L3L7: 100%|██████████| 2/2 [1:13:30<00:00, 2205.39s/it]


Generating games for L7L4 (A=7 vs B=4, 2 games)


L7L4: 100%|██████████| 2/2 [1:38:59<00:00, 2969.93s/it]


Generating games for L4L7 (A=4 vs B=7, 2 games)


L4L7: 100%|██████████| 2/2 [1:15:54<00:00, 2277.35s/it]


Generating games for L7L5 (A=7 vs B=5, 2 games)


L7L5: 100%|██████████| 2/2 [2:52:06<00:00, 5163.05s/it]  


Generating games for L5L7 (A=5 vs B=7, 2 games)


L5L7: 100%|██████████| 2/2 [1:29:30<00:00, 2685.46s/it]


Generating games for L7L6 (A=7 vs B=6, 2 games)


L7L6: 100%|██████████| 2/2 [2:25:58<00:00, 4379.23s/it]  


Generating games for L6L7 (A=6 vs B=7, 2 games)


L6L7: 100%|██████████| 2/2 [1:57:39<00:00, 3529.95s/it]  


Generating games for L7 (A=7 vs B=7, 2 games)


L7: 100%|██████████| 2/2 [2:35:00<00:00, 4650.32s/it]


In [8]:
# Append to DATA
if not DATA.empty:
    DATA = pd.concat([DATA, df_new], ignore_index=True)
else:
    DATA = df_new

# Remove duplicate rows (exact matches across all columns)
DATA = DATA.drop_duplicates().reset_index(drop=True)

# Save back to Excel
DATA.to_excel(DATA_FILE, index=False)
print(f"Saved {len(DATA)} unique rows total to {DATA_FILE}")


Saved 70 unique rows total to C4.data.xlsx


In [9]:
total_end_time = time.time()
total_elapsed = (total_end_time - begin_start_time) / 3600
print(f"Data generated in {total_elapsed:.1f} hours")

Data generated in 26.8 hours
