In [1]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [2]:
# Loading test puzzles
with open('sudoku_test.pkl', 'rb') as f:
    unsolved_test, solved_test = pickle.load(f)

In [3]:
# Loading train puzzles
with open('sudoku.pkl', 'rb') as f:
    unsolved, solved = pickle.load(f)

## Creating and training model

In [4]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

In [9]:
def cnn_solver_enhanced(x, y):
    '''Model with three convolutional layers. Number of filtres are doubled within each layer''' 
    # Set up the model
    solver = models.Sequential()
    # Add convolutional layer with 64 filters
    solver.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(9, 9, 1), padding='same'))
    solver.add(layers.BatchNormalization())
    # Add convolutional layer with 128 filters
    solver.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    solver.add(layers.BatchNormalization())
    # Add convolutional layer with 256 filters
    solver.add(layers.Conv2D(256, (1, 1), activation='relu', padding='same'))
    
    # Prepare output of convolutional layers for densely connected network by flattening out data
    solver.add(layers.Flatten())
    # Add densely connected neural network
    solver.add(layers.Dense(729))
    solver.add(layers.Reshape((-1,9)))
    # Choose activation function
    solver.add(layers.Activation('softmax'))
    # Choose optimizer and fit model
    optimizer_ad = tf.keras.optimizers.Adam(lr=0.001)
    solver.compile(ArithmeticErroroptimizer=optimizer_ad,loss='sparse_categorical_crossentropy')
    solver.fit(x, y, batch_size=100, epochs=2)
    
    # Save model
    solver.save('cnn_sudoku_model_enhanced')


In [250]:
def cnn_solver_reduced(x, y):
    '''
    Model with two convolutional layers. Number of filtres remains the same
    It goes through 3 epochs
    '''
    # Set up the model
    solver = models.Sequential()
    # Add convolutional layer with 64 filters
    solver.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(9, 9, 1), padding='same'))
    solver.add(layers.BatchNormalization())
    # Add convolutional layer with 64 filters
    solver.add(layers.Conv2D(64, (1, 1), activation='relu', padding='same'))
    
    # Prepare output of convolutional layers for densely connected network by flattening out data
    solver.add(layers.Flatten())
    # Add densely connected neural network
    solver.add(layers.Dense(729))
    solver.add(layers.Reshape((-1,9)))
    # Choose activation function
    solver.add(layers.Activation('softmax'))
    # Choose optimizer and fit model
    optimizer_ad = tf.keras.optimizers.Adam(lr=0.001)
    solver.compile(optimizer=optimizer_ad,loss='sparse_categorical_crossentropy')
    solver.fit(x, y, batch_size=100, epochs=3)
    
    # Save model
    solver.save('cnn_sudoku_model_reduced')

In [5]:
# Prepare data for model
unsolved_prepared = np.asarray([np.asarray((x/9) - 0.5).reshape(9,9,-1) for x in unsolved])
solved_prepared = np.asarray([np.asarray(x-1).reshape(81,-1) for x in solved])

unsolved_test_prepared = np.asarray([np.asarray((x/9) - 0.5).reshape(9,9,-1) for x in unsolved_test])
solved_test_prepared = np.asarray([np.asarray(x-1).reshape(81,-1) for x in solved_test])

In [9]:
# Enhanced solver training
cnn_solver_enhanced(unsolved_prepared, solved_prepared)

Epoch 1/2
Epoch 2/2


In [251]:
# Reduced solver training
cnn_solver_reduced(unsolved_prepared, solved_prepared)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [6]:
def predict_sudoku(puzzle, model):
    '''Function for solving sudoku puzzles''' 
    # Map missing values
    map_missing = ((puzzle == -0.5).reshape(9,9))
    
    # Loop while there are values to be filled in the puzzle
    while map_missing.sum() > 0:
        # Let model make a prediction
        output = model.predict(puzzle.reshape(-1,9,9,1)).squeeze()
        # Each position in sudoku grid has now assigned set of probabilities of possible entries (numbers from 1 to 9)
        
        # Returns maximum probability from each set of probabilities
        max_probability = np.around(np.max(output, axis=1).reshape((9,9)), 3)
        # Return correspongind prediction from each set
        predictions = np.argmax(output, axis=1).reshape(9,9)+1
        
        # Map all unknown numbers 
        map_missing_prob = max_probability * map_missing
        
        # Find a position in sudoku grid where the model is most certaint about predicted value
        index_max_prob = np.argmax(map_missing_prob)
        
        # Find x and y coordinates
        x = index_max_prob // 9
        y = index_max_prob % 9
        
        # Select predicted value which will be inported to the solution
        predicted_value = predictions[x][y]
        
        # Add predicted value to correct position 
        puzzle[x][y] = (predicted_value / 9) - 0.5
        # Update map of missing values
        map_missing = ((puzzle == -0.5).reshape(9,9))
    
    # Convert normalized values back to numbers
    puzzle_with_numbers = (puzzle + 0.5) * 9
    
    return puzzle_with_numbers.reshape(9,9)

In [7]:
def acc(x_test, y_test, model):
    '''Function for evaluating sudoku solver accuracy''' 
    solved = 0
    
    for i, game in enumerate(x_test):
        
        prediction = predict_sudoku(game, model)

        if np.array_equal(prediction, y_test[i]):
            solved += 1
        if i % 100 == 0:
            print(i)
            
    return solved / len(x_test)

## Model evalulation

In [8]:
# Lead enhanced model
model_enhanced = models.load_model('cnn_sudoku_model_enhanced')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [9]:
# Load reduced model
model_reduced = models.load_model('cnn_sudoku_model_reduced')

In [255]:
# Enhanced model evaluation
acc(unsolved_test_prepared, solved_test, model_enhanced)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900


0.7282

In [16]:
# Reduced model evaluation
acc(unsolved_test_prepared, solved_test, model_reduced)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900


0.7148

### Insight
Accuracy of both models was 72.82 %. This is not much. However accuracy score was raised only if the predicted sudoku actualy matched with corresponding test sudoku. As the sudoku was randomly generated, there may be more correct solutions for one puzzle. In upcomming sections each sudoku is checked with respect to actual game rules.



In [12]:
def check_grid_by_rules(grid):
    grid_t = np.transpose(grid)
    grid_split_h = np.hsplit(grid, 3)
    grid_split_1 = np.split(grid_split_h[0], 3)
    grid_split_2 = np.split(grid_split_h[1], 3)
    grid_split_3 = np.split(grid_split_h[2], 3)
    
    for number in range(1,10):
        for i in range(9):
            if np.isin(grid[i], number).sum() > 1 or np.isin(grid[i], number).sum() == 0:
                return 0
            if np.isin(grid_t[i], number).sum() > 1 or np.isin(grid_t[i], number).sum() == 0:
                return 0
        for j in range(3):
            if np.isin(grid_split_1[j], number).sum() > 1 or np.isin(grid_split_1[j], number).sum() == 0:
                return 0
            if np.isin(grid_split_2[j], number).sum() > 1 or np.isin(grid_split_2[j], number).sum() == 0:
                return 0
            if np.isin(grid_split_3[j], number).sum() > 1 or np.isin(grid_split_3[j], number).sum() == 0:
                return 0
        
    return 1

In [14]:
def sudoku_correct_check(games, model):
    correct = 0
    
    for i,game in enumerate(games):
        
        pred = predict_sudoku(game, model)
        
        correct += check_grid_by_rules(pred)
    
    print(correct/games.shape[0])

### Check models by sudoku rules

In [124]:
# Enhanced model
sudoku_correct_check(unsolved_test_prepared[0:1000], model_enhanced)

0.972


In [15]:
# Reduced model
sudoku_correct_check(unsolved_test_prepared[0:1000], model_reduced)

0.955
