In [9]:
import torch
import numpy as np
from tqdm import tqdm 
import utils
import ebm
import sudoku_dataset as dataset

In [3]:
CONFIG = {
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "model_path": "best_ebm.pt",  # Path to your saved model
    "test_data_path": "dataset_test.npy", # Path to your dataset
    "solver_steps": 2000,          # More steps for higher quality solutions
    "solver_step_size": 0.1,
}

In [4]:
def generate_solution(model, initial_board, puzzle_mask, steps, step_size):
    """
    Uses Langevin dynamics to find a low-energy solution starting from a puzzle.
    """

    x = initial_board.clone().detach().requires_grad_(True)
    model.eval()

    for i in tqdm(range(steps), desc="Solving Puzzle"):
        noise_scale = np.sqrt(step_size) * (1 - i / steps) # Optional: Annealing
        
        energy = model(x).sum()
        grad, = torch.autograd.grad(energy, x)
        
        # Langevin dynamics update
        x.data.add_(-0.5 * step_size * grad) # Gradient descent
        x.data.add_(torch.randn_like(x) * noise_scale) # Add noise
        
        x.data = torch.clamp(x.data, min=0) # Probabilities can't be negative
        mask_expanded = puzzle_mask.unsqueeze(1).expand_as(x)
        x.data[mask_expanded] = initial_board.data[mask_expanded]
        
        x_flat = x.view(-1, 9, 81)
        x_probs = torch.nn.functional.softmax(x_flat, dim=1)
        x.data = x_probs.view_as(x)
        
    return x.detach()


In [7]:

device = torch.device(CONFIG["device"])

model = ebm.SudokuEBM()
model.load_state_dict(torch.load(CONFIG["model_path"], map_location=device))
model.to(device)
model.eval() # IMPORTANT: Set model to evaluation mode



SudokuEBM(
  (net): Sequential(
    (0): Conv2d(9, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): Softplus(beta=1.0, threshold=20.0)
    (4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): Softplus(beta=1.0, threshold=20.0)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=10368, out_features=1, bias=True)
  )
)

In [14]:
sudoku_test = dataset.FileDataset(CONFIG["test_data_path"])

(5000, 2, 9, 9)


In [21]:

n_samples = 10

puzzles = np.array([sudoku_test[i]['puzzle'] for i in range(n_samples)])
solutions = np.array([sudoku_test[i]['solution'] for i in range(n_samples)])

puzzle_one_hot = torch.from_numpy(puzzles).float().to(device)
solution_one_hot = torch.from_numpy(solutions).float().to(device)



In [23]:

### prepare initial data for solves
puzzle_mask = puzzle_one_hot.sum(dim=1) > 0
random_fill = torch.nn.functional.softmax(torch.randn_like(puzzle_one_hot), dim=1)
initial_board = torch.where(puzzle_mask.unsqueeze(1), puzzle_one_hot, random_fill)



In [24]:
solved_one_hot = generate_solution(model, initial_board, puzzle_mask.squeeze(0),
                                    CONFIG["solver_steps"], CONFIG["solver_step_size"])

Solving Puzzle: 100%|██████████| 2000/2000 [00:06<00:00, 311.97it/s]


In [26]:

solved_board = [utils.convert_to_board(vec) for vec in solved_one_hot]
puzzle_board = [utils.convert_to_board(vec) for vec in puzzle_one_hot]
solution_board = [utils.convert_to_board(vec) for vec in solution_one_hot]




In [29]:
print(puzzle_board[0])

[[1 1 1 1 1 1 1 1 9]
 [6 5 1 1 9 1 1 1 8]
 [8 4 9 7 5 2 3 1 1]
 [1 1 4 1 8 3 2 1 1]
 [5 1 2 1 1 6 1 9 1]
 [9 1 1 1 1 5 1 4 7]
 [1 3 1 1 1 1 9 1 6]
 [2 7 6 5 3 1 1 1 1]
 [1 1 1 1 1 7 5 3 1]]


In [27]:
print(solved_board[0])

[[7 6 6 6 6 1 6 6 9]
 [6 5 1 6 9 6 6 6 8]
 [8 4 9 7 5 2 3 6 1]
 [6 6 4 6 8 3 2 1 6]
 [5 6 2 6 6 6 6 9 6]
 [9 6 6 6 1 5 6 4 7]
 [6 3 6 6 6 6 9 6 6]
 [2 7 6 5 3 6 1 6 6]
 [2 2 6 6 6 7 5 3 6]]


In [28]:
print(solution_board[0])

[[3 2 7 8 6 1 4 5 9]
 [6 5 1 3 9 4 7 2 8]
 [8 4 9 7 5 2 3 6 1]
 [7 6 4 9 8 3 2 1 5]
 [5 1 2 4 7 6 8 9 3]
 [9 8 3 2 1 5 6 4 7]
 [4 3 5 1 2 8 9 7 6]
 [2 7 6 5 3 9 1 8 4]
 [1 9 8 6 4 7 5 3 2]]
