In [1]:
import numpy as np
import pandas as pd

import tensorflow
from tensorflow.keras import Model, Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input
from tensorflow.keras.utils import to_categorical

In [2]:
''' defining some useful funcion '''

def loading_data(train=40000, test=10000, full=False):
    ''' reading data '''
    if full:
        s_df = pd.read_csv('sudoku.csv').values
    else:
        ''' converting s_df into array '''
        s_df = next(
            pd.read_csv('sudoku.csv', chunksize=(train + test))).values
        
    ''' transposing data '''
    q, sols = s_df.T
    X = np.array([np.reshape([int(d) for d in flatten_grid], (9, 9)) for flatten_grid in q])
    y = np.array([np.reshape([int(d) for d in flatten_grid], (9, 9)) for flatten_grid in sols])
    
    return (X[:train], y[:train]), (X[train:], y[train:])


def diff(grids_true, grids_pred):
    """ This function shows how well predicted quizzes fit to actual solutions. """
    return (grids_true != grids_pred).sum((1, 2))


def del_digits(X, delete=1):
    """ This function is used to create sudoku quizzes from solutions """
    grids = X.argmax(3)  # get the grid in a (9, 9) integer shape
    for grid in grids:
        grid.flat[np.random.randint(0, 81, delete)] = 0  # generate blanks (replace = True)
        
    return to_categorical(grids)


def batch_smart_solve(grids, solver):
    """ This function is ugly, feel free to optimize the code """
    grids = grids.copy()
    for _ in range((grids == 0).sum((1, 2)).max()):
        pred = np.array(solver.predict(to_categorical(grids)))  # get predictions
        probs = pred.max(2).T  # get highest probability for each 81 digit to predict
        values = pred.argmax(2).T + 1  # get corresponding values
        zeros = (grids == 0).reshape((grids.shape[0], 81))  # get blank positions

        for grid, prob, value, zero in zip(grids, probs, values, zeros):
            '''  don't try to fill already completed grid'''
            if any(zero):  
                '''focus on blanks only'''
                where = np.where(zero)[0]  
                ''' best score FOR A ZERO VALUE (confident blank) '''
                conf_pos = where[prob[zero].argmax()] 
                ''' get corresponding value '''
                conf_val = value[conf_pos]  
                ''' fill digit inplace '''
                grid.flat[conf_pos] = conf_val  
    return grids

In [3]:
inp_shape = (9, 9, 10)
(_, y_train), (X_test, y_test) = loading_data() 

''' one-hot encoding'''
X_train = to_categorical(y_train).astype('float32')
X_test = to_categorical(y_test).astype('float32')

y_train = to_categorical(y_train-1).astype('float32') 
y_test = to_categorical(y_test-1).astype('float32')   

In [4]:
''' Model '''

''' container '''
model = Sequential()

''' 1st Hidden Layer '''
model.add(Dense(64, activation='relu', input_shape=inp_shape))
model.add(Dropout(0.4))

''' 2nd Hidden Layer '''
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.4))

''' flattening output '''
model.add(Flatten())

''' Input layer '''
inp = Input(shape=inp_shape)  
features = model(inp)  # commons features

''' Classification Layer '''
out = [Dense(9, activation='softmax')(features) for i in range(81)]

''' final model '''
model = Model(inp, out)  

''' compile the model '''
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [5]:
''' training '''
model.fit(del_digits(X_train, 0), [y_train[:, i, j, :] for i in range(9) for j in range(9)], batch_size=128, epochs=1, 
           verbose=1)



<keras.callbacks.History at 0x2cd6a549de0>

In [None]:
''' Early Stopping '''
es = EarlyStopping(patience=2, verbose=1)

i = 1
for ep, dele in zip([1, 2, 3, 4, 6, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10], [1, 2, 3, 4, 6, 8, 10, 12, 15, 20, 25, 30, 35, 40, 45, 50, 55]):

    print('Pass n° {} ...'.format(i))
    i += 1
    
    ''' training '''
    model.fit(del_digits(X_train, dele), [y_train[:, i, j, :] for i in range(9) for j in range(9)],
              validation_data=(del_digits(X_train, dele), [y_train[:, i, j, :] for i in range(9) for j in range(9)]), 
              batch_size=128, epochs=ep, verbose=1, callbacks=[es])

Pass n° 1 ...


Pass n° 2 ...
Epoch 1/2


Epoch 2/2


Pass n° 3 ...
Epoch 1/3


Epoch 2/3


Epoch 3/3


Pass n° 4 ...
Epoch 1/4


Epoch 2/4


Epoch 3/4


Epoch 4/4


Pass n° 5 ...
Epoch 1/6


Epoch 2/6


Epoch 3/6


Epoch 4/6


Epoch 5/6


Epoch 6/6


Pass n° 6 ...
Epoch 1/8


Epoch 2/8


Epoch 3/8


Epoch 4/8


Epoch 5/8


Epoch 5: early stopping
Pass n° 7 ...
Epoch 1/10


Epoch 2/10


Epoch 3/10


Epoch 4/10


Epoch 4: early stopping
Pass n° 8 ...
Epoch 1/10


Epoch 2/10


Epoch 3/10


Epoch 4/10


Epoch 4: early stopping
Pass n° 9 ...
Epoch 1/10


Epoch 2/10


Epoch 3/10


Epoch 4/10


Epoch 4: early stopping
Pass n° 10 ...
Epoch 1/10

In [None]:
'''Evaluating model '''
q = X_test.argmax(3)  
actual = y_test.argmax(3) + 1 

''' make some guesses'''
sg = batch_smart_solve(q, model)   

''' get number of errors on each quizz '''
de = diff(actual, sg)  

'''portion of correct solved quizzes '''
acc = (de == 0).mean()  

In [None]:
print("""Grid solved:\t {} Correct ones:\t {} Accuracy:\t {}""".format(de.shape[0], (de==0).sum(), acc))