In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

In [None]:
path = "/home/djunice/EnigmaGridSolver/Sudoku.NeuralNet/Resources/1milsudoku/"
data = pd.read_csv(path+"sudoku.csv")
try:
    #data = pd.DataFrame({"quizzes":data["puzzle"],"solutions":data["solution"]})
    data = pd.DataFrame({"quizzes":data["quizzes"],"solutions":data["solution"]})
except:
    pass
data.head()

In [None]:
# choose only 1 million data
data = data[:1000000]

In [None]:
data.info()

In [None]:
print("Quiz:\n",np.array(list(map(int,list(data['quizzes'][0])))).reshape(9,9))
print("Solution:\n",np.array(list(map(int,list(data['solutions'][0])))).reshape(9,9))

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, batch_size=64, subset="train", shuffle=False, info={}):
        super().__init__()
        self.df = df
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.subset = subset
        self.info = info
        
        self.data_path = path
        self.on_epoch_end()
        
    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))  # Use np.ceil to include the remaining samples in the last batch
    
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.df))
        if self.shuffle:
            np.random.shuffle(self.indexes)
            
    def __getitem__(self, index):
        X = np.empty((self.batch_size, 9, 9, 1))
        y = np.empty((self.batch_size, 81, 1))
        indexes = self.indexes[index * self.batch_size: (index + 1) * self.batch_size]
        for i, f in enumerate(self.df['quizzes'].iloc[indexes]):
            self.info[index * self.batch_size + i] = f
            X[i,] = (np.array(list(map(int, list(f)))).reshape((9, 9, 1)) / 9) - 0.5
        if self.subset == 'train':
            for i, f in enumerate(self.df['solutions'].iloc[indexes]):
                self.info[index * self.batch_size + i] = f
                y[i,] = np.array(list(map(int, list(f)))).reshape((81, 1)) - 1
        if self.subset == 'train':
            return X, y
        else:
            return X

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, Flatten, Dense, Reshape, Activation

# def create_sudoku_model(input_shape=(9, 9, 1)):
#     model = Sequential()
#     model.add(Conv2D(128, 3, activation='relu', padding='same', input_shape=input_shape))
#     model.add(BatchNormalization())
#     model.add(Conv2D(128, 3, activation='relu', padding='same'))
#     model.add(BatchNormalization())
#     model.add(Conv2D(256, 3, activation='relu', padding='same'))
#     model.add(BatchNormalization())
#     model.add(Conv2D(256, 3, activation='relu', padding='same'))
#     model.add(BatchNormalization())
#     model.add(Conv2D(512, 3, activation='relu', padding='same'))
#     model.add(BatchNormalization())
#     model.add(Conv2D(512, 3, activation='relu', padding='same'))
#     model.add(BatchNormalization())
#     model.add(Conv2D(1024, 3, activation='relu', padding='same'))
#     model.add(BatchNormalization())
#     model.add(Conv2D(9, 1, activation='relu', padding='same'))
    
#     # Flatten and dense layers
#     model.add(Flatten())
#     model.add(Dense(81*9))
#     model.add(Reshape((-1, 9)))
#     model.add(Activation('softmax'))
    
#     return model

# # Create the model
# model = create_sudoku_model()

model = tf.keras.models.load_model('/home/djunice/EnigmaGridSolver/Sudoku.NeuralNet/Resources/tf3-1.h5')

In [None]:
model.summary()

### Data Generators
We will be creating training and test data generator. Lets use 95% data for training and 5% data for validation as 5% of 9Million is still very large for validation purposes. 

In [None]:
train_idx = int(len(data)*0.95)
data = data.sample(frac=1).reset_index(drop=True)
training_generator = DataGenerator(data.iloc[:train_idx], subset = "train", batch_size=64)
validation_generator = DataGenerator(data.iloc[train_idx:], subset = "train",  batch_size=64)

In [None]:
training_generator.__getitem__(4)[0].shape

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    patience=2,
    verbose=1,
    min_lr=1e-6
)

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    verbose=1,
    restore_best_weights=True
)


In [None]:
model.compile(loss='sparse_categorical_crossentropy',  optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])
history = model.fit(training_generator, validation_data = validation_generator, batch_size = 64, epochs = 10, verbose=1, callbacks=[reduce_lr, early_stop])

In [None]:
# save model and architecture to single file
model.save("tf3-2.h5")

## <span style="color:teal;text-decoration:underline">Solving Real Sudokus</span>

In [None]:
def solve_sudoku_with_nn(model, puzzle):
    # Preprocess the input Sudoku puzzle
    puzzle = puzzle.replace('\n', '').replace(' ', '')
    initial_board = np.array([int(j) for j in puzzle]).reshape((9, 9, 1))
    initial_board = (initial_board / 9) - 0.5

    while True:
        # Use the neural network to predict values for empty cells
        predictions = model.predict(initial_board.reshape((1, 9, 9, 1))).squeeze()
        pred = np.argmax(predictions, axis=1).reshape((9, 9)) + 1
        prob = np.around(np.max(predictions, axis=1).reshape((9, 9)), 2)

        initial_board = ((initial_board + 0.5) * 9).reshape((9, 9))
        mask = (initial_board == 0)

        if mask.sum() == 0:
            # Puzzle is solved
            break

        prob_new = prob * mask

        ind = np.argmax(prob_new)
        x, y = (ind // 9), (ind % 9)

        val = pred[x][y]
        initial_board[x][y] = val
        initial_board = (initial_board / 9) - 0.5

    # Convert the solved puzzle back to a string representation
    solved_puzzle = ''.join(map(str, initial_board.flatten().astype(int)))

    return solved_puzzle

You can put in any game in the "game" string to solve it. Just copy new_game string in the game string and modify the desired zeros.

In [None]:
def print_sudoku_grid(puzzle):
    puzzle = puzzle.replace('\n', '').replace(' ', '')
    for i in range(9):
        if i % 3 == 0 and i != 0:
            print("-"*21)

        for j in range(9):
            if j % 3 == 0 and j != 0:
                print("|", end=" ")
            print(puzzle[i*9 + j], end=" ")
        print()
new_game = '''
          0 0 0 0 0 0 0 0 0
          0 0 0 0 0 0 0 0 0
          0 0 0 0 0 0 0 0 0
          0 0 0 0 0 0 0 0 0
          0 0 0 0 0 0 0 0 0
          0 0 0 0 0 0 0 0 0
          0 0 0 0 0 0 0 0 0
          0 0 0 0 0 0 0 0 0
          0 0 0 0 0 0 0 0 0
      '''

game = '''
          0 0 0 7 0 0 0 9 6
          0 0 3 0 6 9 1 7 8
          0 0 7 2 0 0 5 0 0
          0 7 5 0 0 0 0 0 0
          9 0 1 0 0 0 3 0 0
          0 0 0 0 0 0 0 0 0
          0 0 9 0 0 0 0 0 1
          3 1 8 0 2 0 4 0 7
          2 4 0 0 0 5 0 0 0
      '''

solved_puzzle_nn = solve_sudoku_with_nn(model, game)

# Print the solved puzzle as a grid
print("Sudoku Solution (NN):")
print_sudoku_grid(solved_puzzle_nn)