In [2]:
import pandas as pd
import numpy as np

path = 'sudoku.csv'

df = pd.read_csv(path)
try:
    df = pd.DataFrame({"quizzes":df["puzzle"],"solutions":df["solution"]})
except:
    pass
df.head()

Unnamed: 0,quizzes,solutions
0,0700000430400096108006349000940520003584600200...,6795182435437296188216349577943521863584617292...
1,3010865040465210705000000014008000020803479000...,3719865248465213795924738614638197522853479167...
2,0483015603600080909106700030200009355090102006...,7483915623652487919126754834217869355894132766...
3,0083170000042051090000400703271609049014500000...,2983176457642851391539462783271689549814537266...
4,0408906300001368208007405190004670524500207002...,1428956379751368248367425193984671524513287962...


In [12]:
# Convert to 9x9 matrix
def sudoku_board(quiz_string):
    return [list(map(int, quiz_string[i:i+9])) for i in range(0, len(quiz_string), 9)]

In [13]:
def print_board(board): 
    for row in board: 
        print(" ".join(str(cell) for cell in row)) 
    print("\n")

In [14]:
# Checks if the move is valid
def is_valid(board, row, col, num):

    # Checking row and column for duplicate num
    for i in range(9):
        if board[row][i] == num or board[i][col] == num:
            return False

    # Check the 3x3 grid
    start_row, start_col = 3 * (row // 3), 3 * (col // 3)

    for i in range(start_row, start_row+3):
        for j in range(start_col, start_col+3):
            if board[i][j] == num:
                return False

    return True

In [15]:
# Recursive function with backtracking
def solve_board(board, iteration):
    for i in range(9):
        for j in range(9):

            # Finds an empty cell
            if board[i][j] == 0:

                # Try all numbers
                for num in range(1,10):

                    # If move is valid, place number
                    if is_valid(board, i, j, num):
                        board[i][j] = num

                        #print(f"Iteration {iteration} Adding:")
                        #print_board(board)
                        iteration += 1

                        #  Recursively solve the rest of the board
                        if solve_board(board,iteration):
                            return True
                            
                        # Reset if the recursion doesn't lead to an answer
                        board[i][j] = 0
                        #print(f"Iteration {iteration} Removing:")
                        #print_board(board)
                        iteration += 1
                return False
    return True

In [16]:
# Compares the computed solved board with the solution provided
def check_answer(board, solution):
    solved_board = ''.join(str(cell) for row in board for cell in row)
    return solved_board == solution

In [17]:
# Testing with df

import datetime

quiz = df['quizzes'][1]
solution = df['solutions'][1]

test = sudoku_board(quiz)

start_time = datetime.datetime.now()
if solve_board(test,1):
    end_time = datetime.datetime.now()
    time_taken = (end_time - start_time).total_seconds()
    if check_answer(test, solution):
        test2 = ''.join(str(cell) for row in test for cell in row)
        print(test2)
        print("Puzzle solved correctly!")
        print(f"Time taken: {time_taken:.2f} seconds")
    else:
        print("Incorrect!")
        print(f"Time taken: {time_taken:.2f} seconds")
else:
    print("No solution")
    print(f"Time taken: {time_taken:.2f} seconds")

371986524846521379592473861463819752285347916719652438634195287128734695957268143
Puzzle solved correctly!
Time taken: 0.00 seconds


In [18]:
# Testing with the 'hardest' sudoku puzzle
import time

quiz2 = '800000000003600000070090200050007000000045700000100030001000068008500010090000400'

board = sudoku_board(quiz2)

start_time = datetime.datetime.now()
if solve_board(board, 0):
    end_time = datetime.datetime.now()
    time_taken = (end_time-start_time).total_seconds()
    answer = ''.join(str(cell) for row in board for cell in row)
    print(answer)
    print(f"Time taken: {time_taken:.2f} seconds")
else:
    print("No solution")

812753649943682175675491283154237896369845721287169534521974368438526917796318452
Time taken: 0.43 seconds


Based on https://andrewspuzzles.blogspot.com/2021/05/solution-to-worlds-hardest-sudoku.html we can see that the solution is correct

In [19]:
# Function to compare and solve Sudoku boards and measure average time
def compare_sudoku(df):
    total_quizzes = len(df)
    correct_solved = 0
    incorrect_solved = 0
    not_solved = 0
    total_time = 0
    
    for index, row in df.iterrows():
        quiz = row['quizzes']
        solution = row['solutions']
        
        # Save quiz and solution as another variable to avoid changes to original df
        quiz_copy = sudoku_board(quiz)
        solution_board = sudoku_board(solution)
        
        # Start timing
        start_time = time.time()
        
        # Solve the quiz
        iteration = 1
        if solve_board(quiz_copy, iteration):
            # Stop timing
            end_time = time.time()
            
            # Calculate time taken
            elapsed_time = end_time - start_time
            total_time += elapsed_time

            # Compare solved board with provided solution
            if quiz_copy == solution_board:
                correct_solved += 1
            else:
                incorrect_solved += 1
            
            #print(f"Entry {index}:")
            '''
            print("Solved Board:")
            for row in quiz_copy:
                print(row)

            print("Provided Solution:")
            for row in solution_board:
                print(row)
            '''
        else:
            not_solved += 1
            #print(f"Entry {index}: Could not be solved.")
    
    # Calculate the average time
    if total_quizzes > 0:
        average_time = total_time / total_quizzes
    else:
        average_time = 0

    # Print the results
    print(f"Total quizzes: {total_quizzes}")
    print(f"Correctly solved: {correct_solved}")
    print(f"Incorrectly solved: {incorrect_solved}")
    print(f"Could not be solved: {not_solved}")
    print(f"Total time per quiz: {total_time:.4f} seconds")
    print(f"Average time per quiz: {average_time:.4f} seconds")


In [20]:
# Call the comparison function
compare_sudoku(df.head(2000))


Total quizzes: 2000
Correctly solved: 2000
Incorrectly solved: 0
Could not be solved: 0
Total time per quiz: 15.5826 seconds
Average time per quiz: 0.0078 seconds


True