# Imports

In [1]:
import concurrent.futures
import io
import os
import re
import shlex
import socket
import subprocess
from pathlib import Path

import numpy as np
from tqdm.notebook import tqdm

import pandas as pd
import psutil

# Parameters

In [2]:
NOTEBOOK_NAME = "generate_difficult_sudokus"

In [3]:
NOTEBOOK_PATH = Path(NOTEBOOK_NAME).resolve()
NOTEBOOK_PATH.mkdir(exist_ok=True)
NOTEBOOK_PATH

PosixPath('/home/kimlab1/strokach/workspace/proteinsolver/notebooks/generate_difficult_sudokus')

In [4]:
hostname = socket.gethostname()

if hostname == "strokach-cloudtop":
    NPROC = 16
if "SLURM_JOB_CPUS_PER_NODE" in os.environ:
    NRPOC = int(os.environ["SLURM_JOB_CPUS_PER_NODE"])
else:
    NPROC = psutil.cpu_count(logical=False)
    
NPROC

6

In [5]:
START_BATCH_IDX = 0
offset = int(os.getenv("SLURM_ARRAY_TASK_ID", 0)) * 1000
START_BATCH_IDX += offset
START_BATCH_IDX

0

# Functions

In [6]:
def sudoku_is_solved(values) -> bool:
    ref = np.arange(1, 10)
    mat = values.reshape(9, 9)
    for i in range(3):
        for j in range(3):
            v = np.sort(mat[i * 3 : (i + 1) * 3, j * 3 : (j + 1) * 3], axis=None)
            if not (v == ref).all():
                return False
    for i in range(9):
        v = np.sort(mat[i, :])
        if not (v == ref).all():
            return False
    for j in range(9):
        v = np.sort(mat[:, j])
        if not (v == ref).all():
            return False
    return True


def decode_sugen_output(output):
    grid = np.empty((9, 9), dtype=np.int)
    for i, row in enumerate(output.split("\n")[:9]):
        for j, value in enumerate(row.split(" ")):
            if value == "_":
                grid[i, j] = 0
            else:
                grid[i, j] = int(value)

    info_string = output.strip().split("\n")[-1]
    try:
        difficulty = int(
            re.findall("Difficulty: (\d+)", info_string)[0]
        )
    except IndexError:
        difficulty = None
    return grid, difficulty


def generate_sudoku():
    sc = "sugen -i 5000 -t 10000 generate"
    ps = subprocess.run(shlex.split(sc), stdout=subprocess.PIPE)

    sc2 = "sugen solve"
    ps2 = subprocess.run(shlex.split(sc2), input=ps.stdout, stdout=subprocess.PIPE)

    puzzle, _ = decode_sugen_output(ps.stdout.decode())
    solution, difficulty = decode_sugen_output(ps2.stdout.decode())

    assert sudoku_is_solved(solution)
    return puzzle, solution, difficulty

In [7]:
generate_sudoku()

(array([[2, 0, 0, 5, 0, 1, 0, 0, 0],
        [0, 0, 0, 0, 3, 8, 0, 0, 0],
        [0, 1, 5, 0, 0, 0, 8, 0, 2],
        [4, 0, 0, 0, 0, 9, 1, 6, 0],
        [0, 0, 3, 0, 0, 0, 2, 0, 0],
        [0, 7, 6, 1, 0, 0, 0, 0, 4],
        [5, 0, 7, 0, 0, 0, 6, 8, 0],
        [0, 0, 0, 8, 7, 0, 0, 0, 0],
        [0, 0, 0, 9, 0, 6, 0, 0, 1]]), array([[2, 3, 8, 5, 6, 1, 4, 9, 7],
        [7, 4, 9, 2, 3, 8, 5, 1, 6],
        [6, 1, 5, 4, 9, 7, 8, 3, 2],
        [4, 5, 2, 7, 8, 9, 1, 6, 3],
        [1, 9, 3, 6, 4, 5, 2, 7, 8],
        [8, 7, 6, 1, 2, 3, 9, 5, 4],
        [5, 2, 7, 3, 1, 4, 6, 8, 9],
        [9, 6, 1, 8, 7, 2, 3, 4, 5],
        [3, 8, 4, 9, 5, 6, 7, 2, 1]]), 653)

# Generate sudokus

In [8]:
def get_output_file(file_index):
    return NOTEBOOK_PATH.joinpath(f"sudoku_{file_index}.csv")

In [9]:
file_index = START_BATCH_IDX

while get_output_file(file_index).is_file():
    file_index += 1
    
file_index

0

In [10]:
batch_size = 1_000 * NPROC

batch_size

6000

In [None]:
while True:
    results = []
    with concurrent.futures.ThreadPoolExecutor(NPROC) as pool:
        futures = [pool.submit(generate_sudoku) for _ in range(batch_size)]
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
            puzzle, solution, difficulty = future.result()
            puzzle_str = "".join([str(v) for v in puzzle.reshape(-1)])
            solution_str = "".join([str(v) for v in solution.reshape(-1)])
            results.append((puzzle_str, solution_str, difficulty))
    df = pd.DataFrame(results, columns=["puzzle", "solution", "difficulty"])
    output_file = get_output_file(file_index)
    print(f"Writing results to file: '{output_file}'.")
    df.to_csv(output_file, sep=",", index=False)
    file_index += 1
    while get_output_file(file_index).is_file():
        file_index += 1