In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np

data = pd.read_csv("/kaggle/input/d/rohanrao/sudoku/sudoku.csv")
data

Unnamed: 0,puzzle,solution
0,0700000430400096108006349000940520003584600200...,6795182435437296188216349577943521863584617292...
1,3010865040465210705000000014008000020803479000...,3719865248465213795924738614638197522853479167...
2,0483015603600080909106700030200009355090102006...,7483915623652487919126754834217869355894132766...
3,0083170000042051090000400703271609049014500000...,2983176457642851391539462783271689549814537266...
4,0408906300001368208007405190004670524500207002...,1428956379751368248367425193984671524513287962...
...,...,...
8999995,2048090300004200800806300000910006083009041207...,2648197351374259865896372414913726583569841277...
8999996,8001003000005900045296401704000000212080190506...,8641723951735982645296431784578369212384196576...
8999997,0000003000008054604900630810070800301000592088...,2687143953718954624952637816571829341346592788...
8999998,0290061007030000608617020054300009010000100400...,5298461377431598628617324954376289512859137469...


## CNN

In [3]:
class SudokuDataset(Dataset):
    def __init__(self, dataframe, subset="train"):
        self.dataframe = dataframe
        self.subset = subset

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        puzzle = self.dataframe.iloc[idx, 0]
        solution = self.dataframe.iloc[idx, 1]

        puzzle = torch.tensor([int(p) for p in puzzle], dtype=torch.float32).reshape(1, 9, 9) / 9 - 0.5
        if self.subset == 'train':
            # Subtracting 1 from each solution value to match PyTorch's 0-based indexing
            solution = torch.tensor([int(s) - 1 for s in solution], dtype=torch.int64).reshape(81)
            return puzzle, solution
        return puzzle
    
train_idx = int(len(data) * 0.98)
train_data = SudokuDataset(data.iloc[:train_idx], subset="train")
train_loader = DataLoader(train_data, batch_size=640, shuffle=True)

val_data = SudokuDataset(data.iloc[train_idx:], subset="train")
val_loader = DataLoader(val_data, batch_size=640, shuffle=False)

In [4]:
import torch.nn as nn
import torch.nn.functional as F

class SudokuSolver(nn.Module):
    def __init__(self):
        super(SudokuSolver, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 128, kernel_size=1)
        self.fc1 = nn.Linear(128 * 9 * 9, 81 * 9)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x.view(-1, 9)  # Reshaped to match [batch_size * 81, 9]

model = SudokuSolver()

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [6]:
def calculate_accuracy(outputs, labels):
    _, predicted = torch.max(outputs, dim=1)
    correct = (predicted == labels).sum().item()
    return correct / labels.size(0)

def train_model(model, train_loader, val_loader, epochs):
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        train_correct = 0
        total_train = 0
        # Training Phase
        for puzzles, solutions in train_loader:
            puzzles, solutions = puzzles.to(device), solutions.to(device)
            optimizer.zero_grad()
            outputs = model(puzzles)
            loss = criterion(outputs, solutions.view(-1))
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_correct += calculate_accuracy(outputs, solutions.view(-1)) * solutions.size(0)
            total_train += solutions.size(0)

        avg_train_loss = train_loss / len(train_loader)
        train_accuracy = train_correct / total_train
        # Validation Phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        total_val = 0

        with torch.no_grad():
            for puzzles, solutions in val_loader:
                puzzles, solutions = puzzles.to(device), solutions.to(device)
                outputs = model(puzzles)
                val_loss += criterion(outputs, solutions.view(-1)).item()

                val_correct += calculate_accuracy(outputs, solutions.view(-1)) * solutions.size(0)
                total_val += solutions.size(0)

        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = val_correct / total_val

        print(f'Epoch {epoch + 1}: '
              f'Train Loss: {avg_train_loss:.4f}, '
              f'Train Accuracy: {train_accuracy:.4f}, '
              f'Validation Loss: {avg_val_loss:.4f}, '
              f'Validation Accuracy: {val_accuracy:.4f}')

In [7]:
train_model(model, train_loader, val_loader, 5)

Epoch 1: Train Loss: 0.4286, Train Accuracy: 0.8022, Validation Loss: 0.3801, Validation Accuracy: 0.8195
Epoch 2: Train Loss: 0.3751, Train Accuracy: 0.8228, Validation Loss: 0.3727, Validation Accuracy: 0.8245
Epoch 3: Train Loss: 0.3695, Train Accuracy: 0.8267, Validation Loss: 0.3694, Validation Accuracy: 0.8269
Epoch 4: Train Loss: 0.3666, Train Accuracy: 0.8287, Validation Loss: 0.3674, Validation Accuracy: 0.8281
Epoch 5: Train Loss: 0.3645, Train Accuracy: 0.8300, Validation Loss: 0.3658, Validation Accuracy: 0.8293


## Testing with CNN

In [8]:
import pandas as pd
test=pd.read_csv("/kaggle/input/sudoku/testing_df.csv")
test

Unnamed: 0,test
0,9027008137614000250800000706070950009106000542...
1,0708010200207300001060920040900754800520080038...
2,0009001650806000306012437008070950230307046009...
3,1580096070000760027264183000157040000040800510...
4,0020500096038002059700008361080005032060350800...
...,...
9995,0001050300407062100009405607510943060202507040...
9996,1058000404001300056030509170006005315060030092...
9997,0000278000703000900487500367800000040524009783...
9998,0020950000391800001050020980017200803275009164...


In [9]:
def preprocess_puzzle(puzzle_str):
    """
    Preprocess the input Sudoku puzzle string into a format suitable for the model.
    """
    puzzle_tensor = torch.tensor([int(p) for p in puzzle_str], dtype=torch.float32).reshape(1, 1, 9, 9) / 9 - 0.5
    return puzzle_tensor.to(device)

def predict_test_set(model, test_df):
    """
    Predict solutions for all puzzles in the test DataFrame.
    """
    model.eval()
    predictions = []

    for _, row in test_df.iterrows():
        puzzle_str = row['test']
        with torch.no_grad():
            puzzle_tensor = preprocess_puzzle(puzzle_str)
            output = model(puzzle_tensor)
            solution = output.argmax(dim=1).view(9, 9) + 1  # Convert predictions to 1-based indexing
            predictions.append(solution.cpu().numpy())

    return predictions

In [10]:
# predict on the entire test set
predicted_solutions = predict_test_set(model, test)

In [11]:
indexes = []
labels= []
#for i,j in enumerate(predicted_solutions_df["predicted_solution"]):
for i,j in enumerate(predicted_solutions):
    preds = j.reshape(9,9)
    for row in range(0,9):
        for col in range(0,9):
            index = str(i)+"_"+str(row)+str(col)
            indexes.append(index)
            labels.append(preds[row,col])

In [12]:
CNN_Test_Predictions =  {"id":indexes , "values":labels}
CNN_Test_Predictions = pd.DataFrame(CNN_Test_Predictions)
CNN_Test_Predictions

Unnamed: 0,id,values
0,0_00,9
1,0_01,4
2,0_02,2
3,0_03,7
4,0_04,5
...,...,...
809995,9999_84,1
809996,9999_85,8
809997,9999_86,3
809998,9999_87,9


In [13]:
CNN_Test_Predictions.to_csv("CNN_Test_Predictions.csv", index=False)

## LSTM

In [14]:
class SudokuDataset(Dataset):
    def __init__(self, dataframe, subset="train"):
        self.dataframe = dataframe
        self.subset = subset

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Get puzzle and solution strings
        puzzle_str = self.dataframe.iloc[idx, 0]
        solution_str = self.dataframe.iloc[idx, 1]

        # Convert strings to list of integers
        puzzle = [int(p) for p in puzzle_str]
        solution = [int(s) for s in solution_str]

        # Convert lists to tensors
        puzzle_tensor = torch.tensor(puzzle, dtype=torch.float32) / 9.0 - 0.5
        solution_tensor = torch.tensor(solution, dtype=torch.int64) - 1
        return puzzle_tensor, solution_tensor

In [15]:
class SudokuSolverLSTM(nn.Module):
    def __init__(self, input_size=1, hidden_size=128, num_layers=2, dropout=0.25):
        super(SudokuSolverLSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm1 = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout, bidirectional=True)
        self.lstm2 = nn.LSTM(2*hidden_size, hidden_size, num_layers, batch_first=True, dropout=dropout, bidirectional=True)
        self.lstm3 = nn.LSTM(2*hidden_size, hidden_size, num_layers, batch_first=True, dropout=dropout, bidirectional=True)
        self.fc = nn.Linear(2*hidden_size, 9)  # Output: 9 possible digits

    def forward(self, x):
        # Initialize hidden and cell states
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(x.device)  # 2 for bidirection
        c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate through LSTM layers
        out, _ = self.lstm1(x.view(x.size(0), -1, self.input_size), (h0, c0))
        out, _ = self.lstm2(out)
        out, _ = self.lstm3(out)

        # Reshape output for the fully connected layer
        out = out.contiguous().view(-1, 2*self.hidden_size)  # 2 for bidirection
        out = self.fc(out)  # Fully connected layer

        # Reshape to have batch_size sequences of length 81
        out = out.view(x.size(0), 81, -1)
        
        return out

In [16]:
def train_model_lstm(model, train_loader, val_loader, epochs, criterion, optimizer, device):
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        train_correct = 0
        total_train = 0

        
        for puzzles, solutions in train_loader:
            puzzles, solutions = puzzles.to(device), solutions.to(device).view(-1)
            optimizer.zero_grad()
            outputs = model(puzzles).view(-1, 9)
            loss = criterion(outputs, solutions)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, -1)
            train_correct += (predicted == solutions.view(-1)).sum().item()
            total_train += solutions.numel()


        avg_train_loss = train_loss / len(train_loader)
        train_accuracy = train_correct / total_train

        model.eval()
        val_loss = 0.0
        val_correct = 0
        total_val = 0
        
        with torch.no_grad():
            for puzzles, solutions in val_loader:
                puzzles, solutions = puzzles.to(device), solutions.to(device).view(-1)
                outputs = model(puzzles.view(puzzles.size(0), -1, model.input_size)).view(-1, 9)
                loss = criterion(outputs, solutions).item()
                val_loss += loss
                _, predicted = torch.max(outputs.data, -1)
                val_correct += (predicted == solutions.view(-1)).sum().item()
                total_val += solutions.numel()


        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = val_correct / total_val

        print(f'Epoch {epoch + 1}: '
              f'Train Loss: {avg_train_loss:.4f}, '
              f'Train Accuracy: {train_accuracy:.4f}, '
              f'Validation Loss: {avg_val_loss:.4f}, '
              f'Validation Accuracy: {val_accuracy:.4f}')

In [17]:
# Initialization and training
model_lstm = SudokuSolverLSTM().to(device)
optimizer = torch.optim.Adam(model_lstm.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [18]:
train_model_lstm(model_lstm, train_loader, val_loader, 5, criterion, optimizer, device)

Epoch 1: Train Loss: 1.2502, Train Accuracy: 0.5041, Validation Loss: 1.0245, Validation Accuracy: 0.5460
Epoch 2: Train Loss: 1.1602, Train Accuracy: 0.5232, Validation Loss: 1.0211, Validation Accuracy: 0.5437
Epoch 3: Train Loss: 0.9050, Train Accuracy: 0.5942, Validation Loss: 0.7654, Validation Accuracy: 0.6479
Epoch 4: Train Loss: 1.0482, Train Accuracy: 0.5563, Validation Loss: 1.3902, Validation Accuracy: 0.4726
Epoch 5: Train Loss: 0.8860, Train Accuracy: 0.6137, Validation Loss: 0.7167, Validation Accuracy: 0.6657


### Testing with LSTM

In [19]:
def preprocess_puzzle_lstm(puzzle_str):
    """
    Preprocess the input Sudoku puzzle string into a format suitable for the LSTM model.
    """
    # Convert string to list of integers and normalize the values
    puzzle = [int(p) for p in puzzle_str]
    puzzle_tensor = torch.tensor(puzzle, dtype=torch.float32) / 9.0 - 0.5
    puzzle_tensor = puzzle_tensor.view(1, -1, 1)  # Reshape for the LSTM: (batch_size, sequence_length, input_size)
    return puzzle_tensor.to(device)

def predict_test_set_lstm(model, test_df):
    """
    Predict solutions for all puzzles in the test DataFrame using the LSTM model.
    """
    model.eval()
    predictions = []

    # Use iterrows to iterate over the DataFrame
    for _, row in test_df.iterrows():
        puzzle_str = row['test']
        with torch.no_grad():
            puzzle_tensor = preprocess_puzzle_lstm(puzzle_str)
            output = model(puzzle_tensor)
            # The LSTM model outputs a sequence, so we reshape the prediction to the desired 9x9 Sudoku grid
            solution = output.argmax(dim=2).view(9, 9) + 1  # Convert predictions to 1-based indexing
            predictions.append(solution.cpu().numpy())

    return predictions

# Predict on the entire test set using the LSTM model
predicted_solutions_lstm = predict_test_set_lstm(model_lstm, test)

# submission
indexes = []
labels = []
for i, solution in enumerate(predicted_solutions_lstm):
    for row in range(9):
        for col in range(9):
            index = f"{i}_{row}{col}"
            indexes.append(index)
            labels.append(solution[row, col])

# Create the DataFrame
Test_Predictions_lstm = pd.DataFrame({
    "id": indexes,
    "values": labels
})

#predicted df
Test_Predictions_lstm

Unnamed: 0,id,values
0,0_00,9
1,0_01,4
2,0_02,2
3,0_03,7
4,0_04,5
...,...,...
809995,9999_84,5
809996,9999_85,1
809997,9999_86,3
809998,9999_87,9


In [20]:
Test_Predictions_lstm.to_csv("LSTM_Test_Predictions.csv", index=False)