In [16]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import torch.nn.functional as F
import numpy as np

## Saving a classifier

In this notebook, we will use the classifier that you built in p1.

Hence, first go to that notebook and _export_ the classifier you built there, by adding the following code in that notebook:


In [None]:
#torch.save(model.state_dict(), name)

In [4]:
class LeNet(nn.Module):
    def __init__(self, calibrated=False):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5, 1, padding=2)
        self.conv2 = nn.Conv2d(6, 16, 5, 1)
        self.fc1 = nn.Linear(5*5*16, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84,10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 5*5*16) 
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)



## Loading a pre-trained classifier

Now, we can load that pre-trained classifier in this notebook as follows:

In [8]:
# TODO: Maxime?
def load_clf(clf_classname, path):
    net = clf_classname()
    state_dict = torch.load(path, map_location=lambda storage, loc: storage)
    net.load_state_dict(state_dict)
    return net

model = load_clf(LeNet, 'LeNet.pt')

In [33]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

# Download and load the training data
trainset = datasets.MNIST('../SPO_Semloss/data/VizSudoku', download=False, train=True, transform=transform)
testset = datasets.MNIST('../SPO_Semloss/data/VizSudoku', download=False, train=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=True)

## Recap: solving a sudoku based on the predictions

In the following, we repeat the code of the previous notebook for sampling a sudoku and getting predictions.

We also included example _ortools_ code that solves the sudoku problem _(requires to install ortools, e.g. conda install ortools)_

In [39]:
# image indices per label
mnist_indices_train = {k:np.where(trainset.targets == k) for k in range(10)}
mnist_indices_test = {k:np.where(testset.targets == k) for k in range(10)}
# image tensor per label
mnist_images_train = {k:trainset.data[mnist_indices_train[k]] for k in range(10)}
mnist_images_test = {k:trainset.data[mnist_indices_test[k]] for k in range(10)}
# image supply per label 
mnist_supply_train = {k:len(v[0]) for k,v in mnist_indices_train.items()}
mnist_supply_test = {k:len(v[0]) for k,v in mnist_indices_test.items()}

def sample_visual_sudoku(sudoku_p, is_trainset=True):
    mnist_indices = mnist_images_train if is_trainset else mnist_images_test
    mnist_images = mnist_images_train if is_trainset else mnist_images_test
    mnist_supply = mnist_supply_train if is_trainset else mnist_supply_test
    # parse current sudoku puzzle to save locations for each label
    is_given = np.ma.masked_not_equal(sudoku_p, 0).mask
    need4digit = {k:len(np.where(sudoku_p == k)[0]) for k in range(10)}
    digits_indices = {k:np.where(sudoku_p == k) for k in range(10)}
    # Visual Sudoku container
    sudoku_v = torch.zeros(9,9,28,28, dtype=torch.uint8)
    # fill in the board label per label
    for k in range(1,10):
        # sample label-k images from MNIST
        idx = np.random.choice(np.arange(mnist_supply[k]), size=need4digit[k], replace=False)
        # put images in container
        sudoku_v[digits_indices[k]] = mnist_images[k][idx]
    
    return sudoku_v.float()

def predict_sudoku(model, vizsudoku):
    return model(vizsudoku.flatten(0,1))

vsudoku = sample_visual_sudoku(sudoku_p0)
preds = predict_sudoku(model,vsudoku )


RuntimeError: Expected 4-dimensional input for 4-dimensional weight 6 1 5 5, but got 3-dimensional input of size [81, 28, 28] instead

In [17]:
# sudoku's, from http://hakank.org/minizinc/sudoku_problems2/index.html

sudoku_p0 = torch.IntTensor([[0,0,0, 2,0,5, 0,0,0],
                             [0,9,0, 0,0,0, 7,3,0],
                             [0,0,2, 0,0,9, 0,6,0],
                             [2,0,0, 0,0,0, 4,0,9],
                             [0,0,0, 0,7,0, 0,0,0],
                             [6,0,9, 0,0,0, 0,0,1],
                             [0,8,0, 4,0,0, 1,0,0],
                             [0,6,3, 0,0,0, 0,8,0],
                             [0,0,0, 6,0,8, 0,0,0]])

# sample a dataset index with that value/label
def sample_by_label(dataset, value):
    # primitive but it works...
    idxs = torch.randperm(len(dataset))
    for idx in idxs:
        if dataset.targets[idx] == value:
            return idx
# sample a dataset index for each non-zero number
def sample_visual_sudoku(sudoku_p, dataset):
    nonzero = sudoku_p > 0
    vizsudoku = -torch.ones(sudoku_p.shape, dtype=torch.long)
    vizsudoku[nonzero] = torch.LongTensor([sample_by_label(trainset, value) for value in sudoku_p[nonzero]])
    return vizsudoku
# get predictions
def predict_sudoku(model, vizsudoku_idx, dataset):
    nonzero = vizsudoku_idx > -1
    predsudoku = torch.zeros(vizsudoku_idx.shape, dtype=torch.int32)
    images = trainset.data[vizsudoku_idx[nonzero]]
    images = images.view(images.shape[0], -1).type(torch.float)
    # images.shape (23,784)
    with torch.no_grad():
        probs = model(images).exp()
        preds = torch.argmax(probs, dim=1)
        predsudoku[nonzero] = preds
    return predsudoku.reshape((9,9))

vizsudoku_idx = sample_visual_sudoku(sudoku_p0, trainset)
preds = predict_sudoku(model, vizsudoku_idx, trainset)

RuntimeError: Expected 4-dimensional input for 4-dimensional weight 6 1 5 5, but got 2-dimensional input of size [23, 784] instead

In [None]:
from ortools.sat.python import cp_model

# model and solve a sudoku with ortools
def model_sudoku_ort(grid):
        csp = cp_model.CpModel()

        # init vars
        board = [[csp.NewIntVar(1, 9, 'x_%i%i' % (i,j)) for j in range(9)] for i in range(9)]
        
        # assign knowns
        for i in range(9):
            for j in range(9):
                if preds[i,j] != 0:
                    csp.Add(board[i,j] == preds[i,j])
        
        # all different rows
        for i in range(9):
            csp.AddAlldifferent(board[i])
        
        # all different columns
        for j in range(9):
            csp.AddAlldifferent([board[i,j] for i in range(9)])
        
        # all different cells
        for si in range(3):
            for sj in range(3):
                csp.AddAlldifferent([board[3*si+i,3*sj+j] for j in range(3) for i in range(3)])
        
        return csp
def solve_sudoku_ort(grid):
    # the constraint model
    csp = model_sudoku_ort(grid)
    
    solver = cp_model.CpSolver()
    status = solver.solve(csp) # or similar?
        
    if status != None: # todo, actual status check
        return board # or its values

sol = solve_sudoku_ort(preds.tolist())
sol

## Finding the maximum likelihood solution

As errors in the output may lead to infeasible sudoku's, we are going to want to find the _maximum likelihood_ solution.

First, we read and store the prediction probabilities instead of the predictions. We obtain a 9x9x9 tensor (last dimension = probabilities of digit 1..9)


In [None]:
# get probabilities of predictions
def predict_proba_sudoku(model, vizsudoku_idx, dataset):
    nonzero = vizsudoku_idx > -1
    probsudoku = torch.zeros((9,9,9))
    images = trainset.data[vizsudoku_idx[nonzero]]
    images = images.view(images.shape[0], -1).type(torch.float)
    # images.shape (23,784)
    with torch.no_grad():
        probs = model(images).exp()
        predsudoku[nonzero] = probs
    return predsudoku.reshape((9,9,9))

predict_proba_sudoku(model, vizsudoku_idx, trainset)

## Maximum likelihood estimation with standard CP solver

We need to turn the _satisfaction_ problem of sudoku into an _optimisation_ problem, where we optimize for maximum log likelihood.

__Task: adapt the above code to find the maximum likelihood visual sudoku solution!__

This means adding the objective function: a weighted sum of the decision variables, with as weight the log-probability of that decision variable being equal to the corresponding predicted value.

E.g. \sum_i \sum_j \sum_c log(prob[i,j,c])*[V[i,j] == c]

Note that the only thing that changes is adding the objective, so you can reuse model_sudoku_ort() of an empty grid!!

In [None]:
def solve_vizsudoku_ort(probs):
    # the constraint model
    empty_grid = torch.zeros((9,9), dtype=torch.int).tolist()
    csp = model_sudoku_ort(empty_grid)
    
    # TODO: add the objective function!!
    
    solver = cp_model.CpSolver()
    status = solver.solve(csp) # or similar?
        
    if status != None: # todo, actual status check
        return board # or its values

probs = predict_proba_sudoku(model, vizsudoku_idx, trainset)
psol = solve_vizsudoku_ort(probs)
psol

Let's check what the error is now...

# Maxime, can you add visualizers? thx...
You will need to solve the true labels to get the full 'true' solution I think...