In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Import Libraries

In [None]:
!pip install --quiet imutils

In [None]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import numpy as np
import pandas as pd

import pytesseract
import matplotlib.pyplot as plt
import cv2
from imutils import perspective
from PIL import Image

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, losses
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split

# Load Data

In [None]:
df = pd.read_csv("/kaggle/input/sudoku/sudoku.csv")
df.head()

In [None]:
X = np.array(df['quizzes'].apply(lambda x: [*map(int, x)]).tolist()).reshape(-1, 9, 9, 1)
Y = np.array(df['solutions'].apply(lambda x: [*map(int, x)]).tolist()).reshape(-1, 9, 9) - 1

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=42)

# Model

In [None]:
input_layer = layers.Input(shape=X_train.shape[1:])

conv2_1 = layers.Conv2D(32, (3, 3), activation="relu", padding="same")(input_layer)
batchnorm_1 = layers.BatchNormalization()(conv2_1)

conv2_2 = layers.Conv2D(64, (3, 3), activation="relu", padding="same")(batchnorm_1)
batchnorm_2 = layers.BatchNormalization()(conv2_2)

conv2_3 = layers.Conv2D(128, (3, 3), activation="relu", padding="same")(batchnorm_2)
batchnorm_3 = layers.BatchNormalization()(conv2_3)

conv2_4 = layers.Conv2D(9, 1, activation="relu", padding="same")(batchnorm_3)
flat_layer = layers.Flatten()(conv2_4)

dense_1 = layers.Dense(512)(flat_layer)
dense_2 = layers.Dense(81 * 9)(dense_1)

reshape_layer = layers.Reshape((9, 9, 9))(dense_2)

activation_layer = layers.Activation("softmax")(reshape_layer)

model = models.Model(input_layer, activation_layer)

model.summary()

In [None]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer="rmsprop",
              metrics=["accuracy"])

In [None]:
plot_model(model, show_layer_names=True, show_shapes=True)

# Train

In [None]:
early_stopping = EarlyStopping(monitor="val_loss", patience=3)

In [None]:
history = model.fit(
    X_train, y_train,
    batch_size=64,
    epochs=25,
    validation_split=0.1,
    callbacks=[early_stopping]
)

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

# Results

In [None]:
plt.figure()
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(["train", "valid"])
plt.title("Accuracy Curve")
plt.ylim([0, 1])
plt.show()

In [None]:
plt.figure()
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["train", "valid"])
plt.title("Loss Curve")
plt.ylim([0, 1])
plt.show()

# Test

In [None]:
image_path = "/kaggle/input/d/ahmedashrafahmed/sudoku/sudoku.jpg"

In [None]:
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

In [None]:
gray = cv2.imread(image_path)
gray = cv2.cvtColor(gray, cv2.COLOR_BGR2GRAY)
ret, binary_image = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)

In [None]:
contours, hierarchy = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

In [None]:
length = cv2.arcLength(contours[96], True)
approx = cv2.approxPolyDP(contours[96], 0.002 * length, True)

In [None]:
wrapped = perspective.four_point_transform(image, approx.squeeze())

In [None]:
wrapped_gray = cv2.cvtColor(wrapped, cv2.COLOR_RGB2GRAY)
ret, wrapped_binary = cv2.threshold(wrapped_gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)

In [None]:
wrapped_binary = cv2.resize(wrapped_binary, (450, 450))

In [None]:
def split_cells(image):
    rows = np.vsplit(image, 9)
    boxes = []
    for row in rows:
        boxs = np.hsplit(row, 9)
        for box in boxs:
            box = np.array(box)
            box = box[5:50, 5:50]
            box = Image.fromarray(box)
            boxes.append(box)

    return boxes

In [None]:
sudoku_cells = split_cells(wrapped_binary)

In [None]:
fig, axes = plt.subplots(nrows=9, ncols=9, figsize=(20, 20))
for idx, sudoku_cell in enumerate(sudoku_cells):
    axes[idx // 9][idx % 9].imshow(sudoku_cell, cmap="gray")
    axes[idx // 9][idx % 9].axis("off")
plt.show()

In [None]:
sudoku = []

for sudoku_cell in sudoku_cells:
    number_text = pytesseract.image_to_string(sudoku_cell, config="--psm 10 --oem 3 -c tessedit_char_whitelist=123456789")
    try:
        sudoku.append(int(number_text))
    except:
        sudoku.append(0)

In [None]:
sudoku = np.array(sudoku)

In [None]:
test_sudoku = sudoku.reshape((9, 9))
test_sudoku

In [None]:
# fix
test_sudoku[0][8] = 9

test_sudoku[1][5] = 7

test_sudoku[2][2] = 4
test_sudoku[2][8] = 7

test_sudoku[3][3] = 7
test_sudoku[3][5] = 1
test_sudoku[3][7] = 2

test_sudoku[4][2] = 8
test_sudoku[4][8] = 7

test_sudoku[5][1] = 1
test_sudoku[5][3] = 5
test_sudoku[5][5] = 2
test_sudoku[5][6] = 0

test_sudoku[6][2] = 7
test_sudoku[6][4] = 4

test_sudoku[7][3] = 3
test_sudoku[7][5] = 9
test_sudoku[7][7] = 4

test_sudoku[8][4] = 5
test_sudoku[8][8] = 8

In [None]:
result = model.predict(test_sudoku.reshape(1, 9, 9, 1)).argmax(-1).squeeze() + 1
result

In [None]:
def isValidSet(nums):
    seen = set()
    for num in nums:
        if num != 0:
            if num in seen:
                return False
            seen.add(num)
    return True

In [None]:
def isValidSudoku(board):
    for row in board:
        if not isValidSet(row):
            return False
    
    for col in zip(*board):
        if not isValidSet(col):
            return False
    
    for i in range(0, 9, 3):
        for j in range(0, 9, 3):
            square = [board[x][y] for x in range(i, i+3) for y in range(j, j+3)]
            if not isValidSet(square):
                return False
    
    return True

In [None]:
isValidSudoku(
      [[8, 7, 3, 4, 1, 5, 4, 6, 9],
       [9, 5, 9, 8, 3, 7, 4, 1, 4],
       [1, 3, 4, 2, 9, 5, 8, 8, 2],
       [4, 6, 3, 7, 8, 1, 5, 2, 5],
       [5, 9, 8, 9, 6, 3, 1, 3, 7],
       [7, 1, 3, 5, 8, 2, 8, 9, 6],
       [1, 9, 7, 1, 4, 8, 6, 3, 3],
       [6, 8, 5, 3, 7, 9, 5, 4, 5],
       [3, 4, 6, 1, 5, 6, 9, 7, 8]]
)