In [3]:
#!/usr/bin/env python3
import json
import random
from collections import defaultdict

INFILE  = "rush.txt"                           
OUTFILE = "rush_no_wall_1000_balanced.json"    
EXIT    = [3, 6]                              
SEED    = 42                                  
TARGET  = 1000   
MOVE_MIN, MOVE_MAX = 2, 45                            

random.seed(SEED)

In [4]:
def parse_line(line: str):
    """
    Each line: "<moves> <36-char-board> <cluster_size>"
    Return (moves:int, board_str:str) or None if malformed.
    """
    parts = line.strip().split()
    if len(parts) < 3:
        return None
    try:
        moves = int(parts[0])
    except ValueError:
        return None
    board = "".join(parts[1:-1])
    if len(board) != 36:
        return None
    return moves, board

def to_cell(c: str):
    # 'o', '.', 'x' -> null (we exclude 'x' boards anyway)
    if c in ("o", ".", "x"):
        return None
    return "R" if c == "A" else c

def board_to_matrix(board: str):
    return [
        [to_cell(board[r*6 + c]) for c in range(6)]
        for r in range(6)
    ]

def allocate_equal_range(buckets, target, move_keys):
    """
    buckets: dict[moves] -> list[(idx, moves, board)]
    move_keys: sorted list of moves we care about (e.g., 2..45)
    Make selection ~equally across move_keys to reach 'target'.
    """
    k = len(move_keys)
    if k == 0:
        return []

    base = target // k
    rem  = target % k

    # Initial quota (e.g., 22 each, plus +1 to the first 'rem' buckets)
    quota = {m: base + (1 if i < rem else 0) for i, m in enumerate(move_keys)}

    # First pass: sample up to quota or availability
    selected = {m: [] for m in move_keys}
    shortfall = 0
    for m in move_keys:
        pool = buckets.get(m, [])
        want = quota[m]
        take = min(want, len(pool))
        if take > 0:
            selected[m] = random.sample(pool, take)
        if len(pool) < want:
            shortfall += (want - len(pool))

    if shortfall <= 0:
        # Perfect fill
        out = []
        for m in move_keys:
            out.extend(selected[m])
        return out

    # Build remaining pools for redistribution (exclude already selected)
    remaining_pool = {
        m: [x for x in buckets.get(m, []) if x not in set(selected[m])]
        for m in move_keys
    }

    # Redistribute: keep adding from any bucket with leftovers until target or exhausted
    added = 0
    while added < shortfall:
        progressed = False
        for m in move_keys:
            if remaining_pool[m]:
                selected[m].append(remaining_pool[m].pop())
                added += 1
                progressed = True
                if added >= shortfall:
                    break
        if not progressed:
            break  # nowhere else to draw from

    out = []
    for m in move_keys:
        out.extend(selected[m])
    # Trim if we overshot (unlikely)
    if len(out) > target:
        out = random.sample(out, target)
    return out

def main():
    # read & filter: wall-free AND moves in [2,45]
    wall_free_range = []  # (idx, moves, board)
    with open(INFILE, "r", encoding="utf-8", errors="ignore") as f:
        for idx, line in enumerate(f, start=1):
            parsed = parse_line(line)
            if not parsed:
                continue
            moves, board = parsed
            if "x" in board:
                continue
            if not (MOVE_MIN <= moves <= MOVE_MAX):
                continue
            wall_free_range.append((idx, moves, board))

    # bucket by move count for the required range
    by_moves = defaultdict(list)
    for idx, moves, board in wall_free_range:
        by_moves[moves].append((idx, moves, board))

    move_keys = list(range(MOVE_MIN, MOVE_MAX + 1))
    print(f"Wall-free puzzles with moves in [{MOVE_MIN},{MOVE_MAX}]: {len(wall_free_range)}")
    print("Availability per move count:")
    for m in move_keys:
        print(f"  moves={m}: {len(by_moves[m])} available")

    # select 1000, equally distributed across 2..45 as much as possible
    chosen = allocate_equal_range(by_moves, TARGET, move_keys)
    print(f"Selected {len(chosen)} puzzles (target={TARGET}).")

    if len(chosen) < TARGET:
        print("⚠️ Not enough wall-free puzzles to reach the full target with this distribution.")
        print("   The file will contain fewer than 1000 puzzles.")

    # build output JSON — sequential names starting at 1
    puzzles = []
    for new_id, (_, moves, board) in enumerate(chosen, start=1):
        puzzles.append({
            "name": new_id,
            "exit": EXIT,
            "min_num_moves": moves,
            "board": board_to_matrix(board)
            # If you want to keep moves for auditing, add: "moves": moves
        })

    # compact JSON style with spaces after commas
    json_str = json.dumps(puzzles, ensure_ascii=False)
    json_str = json_str.replace(",", ", ")

    with open(OUTFILE, "w", encoding="utf-8") as f:
        f.write(json_str)

    # report the final distribution we actually achieved
    #    (uncomment to include moves in puzzles and compute from JSON directly)
    final_counts = defaultdict(int)
    for _, moves, _ in chosen:
        final_counts[moves] += 1
    print("Final selected counts per move:")
    for m in move_keys:
        print(f"  moves={m}: {final_counts[m]} selected")

In [5]:
main()

Wall-free puzzles with moves in [2,45]: 476095
Availability per move count:
  moves=2: 8 available
  moves=3: 128 available
  moves=4: 767 available
  moves=5: 3561 available
  moves=6: 10629 available
  moves=7: 21266 available
  moves=8: 34032 available
  moves=9: 48301 available
  moves=10: 59777 available
  moves=11: 61984 available
  moves=12: 53381 available
  moves=13: 41957 available
  moves=14: 31534 available
  moves=15: 24407 available
  moves=16: 19192 available
  moves=17: 14696 available
  moves=18: 11643 available
  moves=19: 9086 available
  moves=20: 7151 available
  moves=21: 5584 available
  moves=22: 4212 available
  moves=23: 3280 available
  moves=24: 2370 available
  moves=25: 1792 available
  moves=26: 1323 available
  moves=27: 995 available
  moves=28: 748 available
  moves=29: 586 available
  moves=30: 391 available
  moves=31: 316 available
  moves=32: 224 available
  moves=33: 176 available
  moves=34: 126 available
  moves=35: 114 available
  moves=36: 84 