# Data
Import and prepare initial dataset

In [61]:
import torch
import numpy as np
import pandas as pd

In [112]:
#Sudokus to load
load_number = 1000


quizzes = np.zeros((load_number+1, 81), np.int32)
solutions = np.zeros((load_number+1, 81), np.int32)
for i, line in enumerate(open('sudoku.csv', 'r').read().splitlines()[1:]):
    quiz, solution = line.split(",")
    for j, q_s in enumerate(zip(quiz, solution)):
        q, s = q_s
        quizzes[i, j] = q
        solutions[i, j] = s
    if i == load_number:
        break
quizzes = quizzes.reshape((-1, 9, 9))
solutions = solutions.reshape((-1, 9, 9))

In [113]:
dataset = {}
dataset['quizzes'] = quizzes
dataset['solutions'] = solutions

In [114]:
from torch.utils.data import Dataset

In [115]:
class SudokuDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, sudoku_list, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.sudoku_list = sudoku_list
        self.transform = transform

    def __len__(self):
        return len(self.sudoku_list['quizzes'])

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        quiz = self.sudoku_list['quizzes'][idx]
        solution = self.sudoku_list['solutions'][idx]
        sample = {'quizzes': quiz, 'solutions': solution}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [116]:
dataset = SudokuDataset(dataset)

# Configurations

In [166]:
batch_size = 2
shuffle = True

# Dataloader 

In [167]:
from torch.utils.data import DataLoader

In [168]:
#Dataloader
train_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=  batch_size,
        shuffle=  True,
        drop_last=False)

train_iterator = train_loader.__iter__()

In [169]:
batch = train_iterator.next()

In [170]:
batch['quizzes'].shape

torch.Size([2, 9, 9])

# Model

In [236]:
import torch.nn as nn
import torch.nn.functional as F

In [337]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        
        self.placement_net = nn.Sequential(
            nn.Linear( 81, 81),
            nn.ReLU(inplace=True),
            nn.Linear( 81, 81),
            nn.ReLU(inplace=True),
            nn.Linear( 81, 81),
            nn.ReLU(inplace=True),
            nn.Linear( 81, 81),
            nn.Softmax(dim=1)
        )
        
        self.number_net = nn.Sequential(
            nn.Linear( 82, 81),
            nn.ReLU(inplace=True),
            nn.Linear( 81, 81),
            nn.ReLU(inplace=True),
            nn.Linear( 81, 81),
            nn.ReLU(inplace=True),
            nn.Linear( 81, 9),
            nn.Softmax(dim=1)
        )

    def forward(self, batch):
        quiz = batch['quizzes'] # Dim B x 9 x 9
        B = quiz.shape[0]
        quiz = quiz.reshape(B, 81) # Dim B x 81
        placement_prob = self.placement_net( quiz.float() )
        placement_values , placement_guess = torch.max(placement_prob, 1)
        
        placement =  placement_values.reshape(-1,1)       
        number_prob = self.number_net( torch.cat((quiz , placement.int()), dim=1 ).float() )
        number_values , number_guess = torch.max(number_prob, 1)
        
        return placement_values, placement_guess, placement_prob, number_values, number_guess

In [338]:
model = Model()

In [339]:
model.forward(batch)

(tensor([0.0161, 0.0165], grad_fn=<MaxBackward0>),
 tensor([13,  3]),
 tensor([[0.0115, 0.0135, 0.0124, 0.0156, 0.0107, 0.0116, 0.0131, 0.0124, 0.0106,
          0.0125, 0.0120, 0.0118, 0.0127, 0.0161, 0.0113, 0.0128, 0.0134, 0.0108,
          0.0099, 0.0108, 0.0112, 0.0124, 0.0133, 0.0104, 0.0127, 0.0121, 0.0114,
          0.0117, 0.0138, 0.0110, 0.0124, 0.0130, 0.0123, 0.0144, 0.0133, 0.0120,
          0.0133, 0.0108, 0.0127, 0.0130, 0.0145, 0.0131, 0.0139, 0.0125, 0.0117,
          0.0124, 0.0118, 0.0116, 0.0130, 0.0151, 0.0114, 0.0134, 0.0113, 0.0114,
          0.0141, 0.0126, 0.0129, 0.0142, 0.0144, 0.0143, 0.0126, 0.0103, 0.0128,
          0.0134, 0.0146, 0.0132, 0.0113, 0.0103, 0.0112, 0.0138, 0.0103, 0.0099,
          0.0119, 0.0112, 0.0134, 0.0116, 0.0113, 0.0115, 0.0095, 0.0126, 0.0111],
         [0.0131, 0.0124, 0.0139, 0.0165, 0.0108, 0.0123, 0.0137, 0.0131, 0.0096,
          0.0127, 0.0112, 0.0100, 0.0112, 0.0159, 0.0133, 0.0134, 0.0149, 0.0109,
          0.0112, 0.0118, 0

# Loss

In [397]:
def loss_function(prediction, solution):
    loss_func = nn.NLLLoss()
    loss = loss_func(prediction, solution)
    
    return loss

In [398]:
prediction = batch['quizzes'].reshape(2,-1)
solution = batch['solutions'].reshape(2,-1)

In [399]:
loss_function(quiz.float(),solution.float())

ValueError: Expected input batch_size (2) to match target batch_size (162).

In [387]:
print(prediction)
print(solution)

tensor([[0, 3, 0, 4, 0, 5, 0, 0, 2, 0, 0, 9, 0, 0, 7, 0, 4, 0, 0, 0, 6, 0, 8, 0,
         1, 0, 0, 2, 0, 0, 1, 0, 3, 0, 6, 5, 4, 0, 0, 9, 2, 0, 0, 0, 0, 1, 7, 0,
         0, 0, 6, 0, 0, 8, 0, 5, 0, 0, 0, 4, 7, 9, 1, 3, 1, 0, 0, 0, 0, 8, 0, 0,
         0, 8, 0, 7, 0, 0, 2, 0, 6],
        [4, 9, 0, 1, 0, 7, 3, 0, 0, 0, 8, 7, 0, 0, 2, 0, 0, 9, 0, 0, 0, 0, 5, 0,
         0, 6, 0, 1, 0, 0, 6, 0, 0, 8, 0, 2, 5, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 9,
         0, 4, 0, 0, 7, 0, 0, 5, 0, 7, 0, 0, 0, 4, 3, 8, 0, 6, 0, 2, 0, 9, 1, 0,
         0, 0, 2, 0, 1, 4, 0, 0, 5]], dtype=torch.int32)
tensor([[7, 3, 1, 4, 9, 5, 6, 8, 2, 8, 2, 9, 6, 1, 7, 5, 4, 3, 5, 4, 6, 3, 8, 2,
         1, 7, 9, 2, 9, 8, 1, 7, 3, 4, 6, 5, 4, 6, 5, 9, 2, 8, 3, 1, 7, 1, 7, 3,
         5, 4, 6, 9, 2, 8, 6, 5, 2, 8, 3, 4, 7, 9, 1, 3, 1, 7, 2, 6, 9, 8, 5, 4,
         9, 8, 4, 7, 5, 1, 2, 3, 6],
        [4, 9, 5, 1, 6, 7, 3, 2, 8, 6, 8, 7, 4, 3, 2, 1, 5, 9, 2, 1, 3, 8, 5, 9,
         7, 6, 4, 1, 7, 4, 6, 9, 5, 8, 3, 2, 5, 2, 8, 3, 7,

In [388]:
torch.randn(3, 5, requires_grad=True)

tensor([[-0.3228, -0.6595, -0.1534,  0.3854,  2.3721],
        [ 0.6186,  0.6380, -1.2228, -2.0176, -0.1752],
        [ 0.3899, -1.1809, -0.0346,  2.2423,  0.3145]], requires_grad=True)

In [391]:
torch.empty(3, dtype=torch.long).random_(5)

tensor([0, 1, 2])

In [420]:
def test():
    loss_func = nn.NLLLoss()
    return loss_func(torch.ones(3,1).long(),torch.zeros(3,1))

In [421]:
test()

RuntimeError: _thnn_nll_loss_forward not supported on CPUType for Long