In [None]:
# Define the network
import torch.nn as nn
import torch.nn.functional as F

# ConvNN1: 1 conv + 1 max pooling + 1 linear layer
# parameters in the network:
# conv1.weight
# conv1.bias
# fc1.weight
# fc1.bias
# fc2.weight
# fc2.bias
class ConvNet_1(nn.Module):
    def __init__(self):
        super(ConvNet_1, self).__init__()
        self.conv1 = nn.Conv2d(1, 3, 3, padding=1) 
#         in_channels (int) – Number of channels in the input image
#         out_channels (int) – Number of channels produced by the convolution
#         kernel_size (int or tuple) – Size of the convolving kernel
#         stride = 1, padding_mode='zeros', by default

        self.pool = nn.AvgPool2d(2, 2) #after pooling, the images becomes 14*14
#         kernel_size – the size of the window to take a max over
#         a single int – in which case the same value is used for the height and width dimension

#         self.conv2 = nn.Conv2d(6, 16, 3, padding=1)
#         self.pool = nn.MaxPool2d(2, 2) #after 2nd pooling, the images becomes 7*7
        self.fc1 = nn.Linear(3 * 14 * 14, 100) # num of channels * size of pixels * size of pixels
    
#         in_features – size of each input sample
#         out_features – size of each output sample
        self.fc2 = nn.Linear(100, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(-1, 3 * 14 * 14)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    
class ConvNet_2(nn.Module):
    def __init__(self):
        super(ConvNet_2, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 3, padding=1) 
        self.pool = nn.AvgPool2d(2, 2) #after pooling, the images becomes 14*14
        self.conv2 = nn.Conv2d(6, 16, 3, padding=1)
        self.pool = nn.AvgPool2d(2, 2) #after 2nd pooling, the images becomes 7*7
        self.fc1 = nn.Linear(16 * 7 * 7, 120) # num of channels * size of pixels * size of pixels
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


#### initialisation sample   

def weights_init(m):
    classname = m.__class__.__name__
    print(classname)
    if classname.find('Conv2d') != -1:
        #print(m.weight.data.shape, m.bias.data.shape)
        nn.init.normal_(m.weight.data, 0.0, 1.0)
        nn.init.constant_(m.bias.data, 0.0)
        # bias is the term in wx + b
    elif classname.find('Linear') != -1:
        nn.init.normal_(m.weight.data, 0.0, 1.0)
        nn.init.constant_(m.bias.data, 0.0)

# net = Net()
    
# print(net)
#net.apply(weights_init)

In [None]:
import torch.optim as optim
import numpy as np

# train a network for given epochs
def train(net, max_epoch):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    for epoch in range(max_epoch):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward() # backpropagation of the weights of linear layers as well as the convolutional layers
            optimizer.step() # dynamic learning rate

            # print statistics
            running_loss += loss.item()
            if i % 400 == 399:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training')
    
    
# evaluation the accuracy of a net with 10000 images
def eval(net):
    correct = 0
    total = 0
    flag = True
    all_predicted = []
    all_outputs = np.zeros((10000, 10))
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            if (flag):
    #             print(images)
                single_input = images
                print(images.shape)
                print('------------------------')
                print(labels.shape)
                lable = labels
                flag = False
            outputs = net(images)
            #from pdb import set_trace

            #print(outputs)
            #set_trace()
            all_outputs[len(all_predicted):len(all_predicted)+batch_size] = outputs
            #set_trace()
            _, predicted = torch.max(outputs.data, 1)
            all_predicted = np.concatenate((all_predicted, predicted))
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))
    
    return all_predicted

In [None]:
# save weights of a net into files

def save_weights(net_name, layers, num_imgs, all_predicted):
    torch.save(net.state_dict(), ".././datasets/nets/%s.pkl" % net_name )
    all_images = testloader.dataset.test_data
    all_true_labels = testloader.dataset.test_labels
    
    import json

    output = {}
    output_weights_conv = {}
    output_weights_fc = {}
    output_weights = {}
    output_biases_conv = {}
    output_biases_fc = {}
    output_biases = {}

    i = 1
    j = 1
    for param_tensor in net.state_dict():
        # for conv layers
        if ('conv' in param_tensor):
            if ('weight' in param_tensor):
                output_weights_conv[str(i)] =  tensor4dToList3d(net.state_dict()[param_tensor].tolist())
            if ('bias' in param_tensor):
                output_biases_conv[str(i)] = net.state_dict()[param_tensor].tolist()
                i += 1
        # for fully connected layers
        if ('fc' in param_tensor):
            if ('weight' in param_tensor):
                output_weights_fc[str(j)] =  net.state_dict()[param_tensor].tolist()
                _T = [list(j) for j in zip(*output_weights_fc[str(j)])]
                output_weights_fc[str(j)] = _T
            if ('bias' in param_tensor):
                output_biases_fc[str(j)] = net.state_dict()[param_tensor].tolist()
                j += 1
                
    output_weights['conv'] = output_weights_conv
    output_weights['fc'] = output_weights_fc
    output['weights'] = output_weights
    output_biases['conv'] = output_biases_conv
    output_biases['fc'] = output_biases_fc
    output['biases'] = output_biases

    for img in range(num_imgs):
        output['layers'] = layers # [input, hidden layers, ..., output]
        output['input'] = all_images[img].flatten().tolist()
        output['label'] = all_true_labels.tolist()[img]
        output['predictedLabel'] = all_predicted[img]
#         from pdb import set_trace

        with open('.././datasets/%s/%s-%s-%s.json' % (net_name, img, output['label'], int(output['predictedLabel'])), 'w') as f:
#             set_trace()
            json.dump(output, f)
    
    print("-- Written to file")
    
# convert a [x, 1, y, z] list to [x, y, z] list
def tensor4dToList3d(a):
    x = []
    for i in range( np.shape(a)[0] ):
        x.append(a[i][0])
    return x
    

In [None]:
# Step 1 Load data
import torch
import torchvision
import torchvision.transforms as transforms

batch_size = 64

# load and normalize MNIST
transform = transforms.Compose(
    [transforms.ToTensor()])#,
trainset = torchvision.datasets.MNIST(root='.././data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=1)

testset = torchvision.datasets.MNIST(root='.././data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=1)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')


In [None]:
# Step 2 select a network and train it for 30 epoches

# net = ConvNet_1()
# 97%

net = ConvNet_2()
# 98 %

train(net, 50)

# Step 3 test accuracy
all_predicted = eval(net)

# Step 4 save the net into file
# save_weights("convnn1",  [784, 2352, 588, 100, 10], 1, all_predicted)
# save_weights("convnn2", [28*28, 28*28*6, 14*14*6, 14*14*16, 7*7*16, 120, 84, 10], 1, all_predicted)
save_weights("convnn2", [784, 4704, 1176, 3136, 784, 120, 84, 10], 1, all_predicted)


In [None]:
# to load pre-trained weights

import torch

net = ConvNet_1()
# net = ConvNet_2()

net.load_state_dict(torch.load("../datasets/nets/convnn1.pkl"))

# # train and test
# train(net, 50)
# all_predicted = eval(net)

# save weights