In [35]:
# Define the network
import torch.nn as nn
import torch.nn.functional as F

# DNN1: 8+8+8 internal units in 3 hidden layers
class Net_8_8_8(nn.Module):
    def __init__(self):
        super(Net_8_8_8, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 8)
        self.fc2 = nn.Linear(8, 8)
        self.fc3 = nn.Linear(8, 8)
        self.fc4 = nn.Linear(8, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28) # flatten 
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x
    
# DNN3: 20+10+8+8 internal units in 4 hidden layers
class Net_20_10_8_8(nn.Module):
    def __init__(self):
        super(Net_20_10_8_8, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 8)
        self.fc4 = nn.Linear(8, 8)
        self.fc5 = nn.Linear(8, 10)
        
    def forward(self, x):
        x = x.view(-1, 28 * 28) # flatten
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x

# DNN5: 20+20+10+10+10 internal units in 5 hidden layers
class Net_20_20_10_10_10(nn.Module):
    def __init__(self):
        super(Net_20_20_10_10_10, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 20)
        self.fc2 = nn.Linear(20, 20)
        self.fc3 = nn.Linear(20, 10)
        self.fc4 = nn.Linear(10, 10)
        self.fc5 = nn.Linear(10, 10)
        self.fc6 = nn.Linear(10, 10)
        
    def forward(self, x):
        x = x.view(-1, 28 * 28) # flatten
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = self.fc6(x)
        return x
    
#### initialisation sample   

def weights_init(m):
    classname = m.__class__.__name__
    print(classname)
    if classname.find('Conv2d') != -1:
        #print(m.weight.data.shape, m.bias.data.shape)
        nn.init.normal_(m.weight.data, 0.0, 1.0)
        nn.init.constant_(m.bias.data, 0.0)
        # bias is the term in wx + b
    elif classname.find('Linear') != -1:
        nn.init.normal_(m.weight.data, 0.0, 1.0)
        nn.init.constant_(m.bias.data, 0.0)

# net = Net()
    
# print(net)
#net.apply(weights_init)

In [36]:
import torch.optim as optim
import numpy as np

def train(net, max_epoch):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    for epoch in range(max_epoch):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward() # backpropagation of the weights of linear layers as well as the convolutional layers
            optimizer.step() # dynamic learning rate

            # print statistics
            running_loss += loss.item()
            if i % 400 == 399:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training')

In [37]:
def eval(net):
    correct = 0
    total = 0
    flag = True
    all_predicted = []
    all_outputs = np.zeros((10000, 10))
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            if (flag):
    #             print(images)
                single_input = images
                print(images.shape)
                print('------------------------')
                print(labels.shape)
                lable = labels
                flag = False
            outputs = net(images)
            #from pdb import set_trace

            #print(outputs)
            #set_trace()
            all_outputs[len(all_predicted):len(all_predicted)+batch_size] = outputs
            #set_trace()
            _, predicted = torch.max(outputs.data, 1)
            all_predicted = np.concatenate((all_predicted, predicted))
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))
    
    return all_predicted

In [38]:
# Step 1 Load data
import torch
import torchvision
import torchvision.transforms as transforms

batch_size = 64

# load and normalize MNIST
transform = transforms.Compose(
    [transforms.ToTensor()])#,
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=1)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=1)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')


In [41]:
def save_weights(net_name, layers, num_imgs, all_predicted):
    torch.save(net.state_dict(), "./datasets/%s.pkl" % net_name )
    all_images = testloader.dataset.test_data
    all_true_labels = testloader.dataset.test_labels
    
    # save parameters as json file
    # {
    #   "layers": [2,2,1],
    #   "input": [0.0, 0.0],
    #   "label": 1,
    #   "weights": {
    #     "1": [[1.0,1.0],[1.0,1.0]],
    #     "2": [[1.0], [-2.0]]
    #   },
    #   "biases": {
    #     "1": [[0.0], [-1.0]],
    #     "2": [[0.0]]
    #   }
    # }
    import json

    output = {}
    output_weights = {}
    output_biases = {}

    i = 1
    for param_tensor in net.state_dict():
        if ('weight' in param_tensor):
            output_weights[str(i)] =  net.state_dict()[param_tensor].tolist()
            _T = [list(i) for i in zip(*output_weights[str(i)])]
            output_weights[str(i)] = _T
        if ('bias' in param_tensor):
            output_biases[str(i)] = net.state_dict()[param_tensor].tolist()
            i += 1
    output['weights'] = output_weights
    output['biases'] = output_biases

    for img in range(num_imgs):
        output['layers'] = layers # [input, hidden layers, ..., output]
        output['input'] = all_images[img].flatten().tolist()
        output['label'] = all_true_labels.tolist()[img]
        output['predictedLabel'] = all_predicted[img]
#         from pdb import set_trace

        with open('./datasets/%s/%s-%s-%s.json' % (net_name, img, output['label'], int(output['predictedLabel'])), 'w') as f:
#             set_trace()
            json.dump(output, f)
    
    print("-- Wrote to file")
    

In [None]:
# Step 2 select a network and train it for 30 epoches

net = Net_8_8_8()
# net = Net_20_10_8_8()
# net = Net_20_20_10_10_10()

train(net, 50)

# Step 3 test accuracy
all_predicted = eval(net)

# Step 4 save the net into file
save_weights("nn1",  [784, 8, 8, 8, 10], 100, all_predicted)
# "nn1",  [784, 8, 8, 8, 10]
# "nn3", [784, 20, 10, 8, 8, 10] 95%
# "nn5", [784, 20, 20, 10, 10, 10, 10] 95%

[1,   400] loss: 0.462
[1,   800] loss: 0.448
[2,   400] loss: 0.388
[2,   800] loss: 0.339
[3,   400] loss: 0.269
[3,   800] loss: 0.228
[4,   400] loss: 0.201
[4,   800] loss: 0.185
[5,   400] loss: 0.161
[5,   800] loss: 0.142
[6,   400] loss: 0.133
[6,   800] loss: 0.127
[7,   400] loss: 0.121
[7,   800] loss: 0.118
[8,   400] loss: 0.113
[8,   800] loss: 0.110
[9,   400] loss: 0.108
[9,   800] loss: 0.102
[10,   400] loss: 0.102
[10,   800] loss: 0.098
[11,   400] loss: 0.093
[11,   800] loss: 0.096
[12,   400] loss: 0.092
[12,   800] loss: 0.089
[13,   400] loss: 0.087
[13,   800] loss: 0.086
[14,   400] loss: 0.084
[14,   800] loss: 0.083
[15,   400] loss: 0.081
[15,   800] loss: 0.078
[16,   400] loss: 0.079
[16,   800] loss: 0.073
[17,   400] loss: 0.074
[17,   800] loss: 0.075
[18,   400] loss: 0.071
[18,   800] loss: 0.073
[19,   400] loss: 0.069
[19,   800] loss: 0.070
[20,   400] loss: 0.067
[20,   800] loss: 0.067
[21,   400] loss: 0.066
[21,   800] loss: 0.065
[22,   400