# Training the 2L-SPC on MNIST database
https://arxiv.org/abs/2002.00892

In [1]:
import sys
from pathlib import Path

sys.path.append(str((Path("..").resolve().absolute())))

from  SPC_2L.DataTools import DataBase
from SPC_2L.Network import LayerPC, Network
from SPC_2L.Coding import ML_Lasso,ML_FISTA
from SPC_2L.DataTools import DataBase, gaussian_kernel
from SPC_2L.Monitor import Monitor
from SPC_2L.Optimizers import mySGD, myAdam
import torch.nn.functional as f
import torch.nn as nn
import torch
import time
import tensorboardX
from SPC_2L.DataTools import LCN, whitening, z_score, mask, to_cuda, norm
from torchvision.utils import make_grid
import numpy as np
from tensorboardX import SummaryWriter
import pickle
from torchvision.transforms import ToTensor, Compose, Resize
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.optim as optim
#from SDPC.Monitor import Monitor
import math
from torchvision.datasets import MNIST
import json


## Tools, parameters and database

In [2]:
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [3]:

## Loading parameters
file = open('config.json',)
config = json.load(file)

## Database 
data_path = '../../DataSet/MNIST/'

resize = Resize(tuple(config['Transforms']['Resize_Size'])) if config['Transforms']['Use_Resize'] else None
whiten = whitening(tuple(config['Transforms']['Whitening_Size']), f_0=config['Transforms']['Whitening_f_0']) if config['Transforms']['Use_Whitening'] else None
lcn = LCN(config['Transforms']['LCN_Kernel_Size'], config['Transforms']['LCN_Sigma'], config['Transforms']['LCN_RGB']) if config['Transforms']['Use_LCN'] else None
zscore = z_score() if config['Transforms']['Use_z_score'] else None
Mask = mask(tuple(config['Transforms']['Mask_Size'])) if config['Transforms']['Use_Mask'] else None

transform_list = [transform for transform in [ToTensor(), to_cuda(), resize, whiten, lcn, zscore, Mask] if transform is not None]
transform = Compose(transform_list)

dataset = MNIST(data_path, transform=transform, train=True, download=True)

DataBase =  DataLoader(dataset,batch_size=32,shuffle=True,drop_last=True)

## Gaussian masks for the dictionaries
#if config.getboolean('Gaussian', 'Use_Gaussian'):
    #mask_g = [gaussian_kernel((), sigma=config.getint('Gaussian', 'Gaussian_Sigma').split(", ")[i]))]
    #mask_g = [gaussian_kernel((64,3,8,8), sigma=30), gaussian_kernel((128,64,8,8), sigma=30)]



## Training and saving the network (b=0 for Hi-La, b=1 for 2L-SPC)

In [4]:

## Definition of the layers, network and sparse coding algorithm
layer = [LayerPC((config["Network"]["Num_Features"][i+1], config["Network"]["Num_Features"][i], config["Params"]["Dico_Shape"][0], config["Params"]["Dico_Shape"][1]),
                  stride=config["Params"]["Stride"][i], 
                  b=config["Network"]["Feedback_Str"], 
                  v=config["Network"]["Initial_Norm"][i], 
                  v_size=config["Params"]["v_size"][i], 
                  out_pad=config["Params"]["Out_Padding"][i]) 
                  for i in range (config["Network"]["Num_Layers"])]

Net = Network(layer, input_size=(config["Network"]["Num_Features"][1], config["Network"]["Num_Features"][0], config["Params"]["Image_Size"][0], config["Params"]["Image_Size"][1]))
Loss = ML_Lasso(Net, [config["Network"]["Sparsity_Param"][i] for i in range(config["Network"]["Num_Layers"])])
Pursuit = ML_FISTA(Net, Loss, max_iter=1000, th=5e-4, mode='eigen')

# Optimizer initialization
opt_dico = [None] * (Net.nb_layers + 1)
for i in range(0, Net.nb_layers):
    opt_dico[i] = mySGD([{'params': Net.layers[i].dico}], lr=config["Network"]["Dictionary_Lr"][i], momentum=0.9, normalize=True)

opt_v = [myAdam([{'params': Net.layers[i].v}], lr=config["Network"]["Normalizer_Lr"][i], normalize=False) for i in range(Net.nb_layers)]


L = [None] * (Net.nb_layers)
L_v = [None] * (Net.nb_layers)
reco = [None] * (Net.nb_layers)

model_name = 'MNIST_[{0},{1}]_b={2}'.format(config["Network"]["Sparsity_Param"][0], config["Network"]["Sparsity_Param"][1], config["Network"]["Feedback_Str"])
path = 'Savings/MNIST/' + model_name +'.pkl'
if config["Params"]["Use_Tensorb"]:
    nrows = [8,8,8,8,8,8,8]
    writer = SummaryWriter('Savings/Log/' + model_name)
    M = Monitor(Net, writer, n_row=nrows)

k=0

l2_loss = torch.zeros(2, config["Params"]["Num_Epoch"] * len(DataBase))
l1_loss = torch.zeros(2, config["Params"]["Num_Epoch"] * len(DataBase))
if config["Params"]["Save"]: 
    for e in range(config["Params"]["Num_Epoch"]):
        for idx_batch, data in enumerate(DataBase):

            batch = data[0].cuda()
            gamma, it, Loss_G, delta = Pursuit.coding(batch)


            for i in range(Net.nb_layers):
                Net.layers[i].dico.requires_grad = True
                L[i] = Loss.F(batch, gamma, i, do_feedback=False).div(batch.size()[0])  ## Unsupervised
                L[i].backward()
                Net.layers[i].dico.requires_grad = False
                opt_dico[i].step()
                opt_dico[i].zero_grad()
                
                l2_loss[i,k]= L[i].detach()
                l1_loss[i,k] =  gamma[i].detach().sum().div(gamma[i].size(0))
                

            for i in range(Net.nb_layers):
                Net.layers[i].v.requires_grad = True  # turn_on(i)
                L_v[i] = Loss.F_v(batch, gamma, i).div(batch.size()[0])
                L_v[i].backward()
                Net.layers[i].v.requires_grad = False  # turn_off(i)
                opt_v[i].step()  
                opt_v[i].zero_grad()
                
            if config["Params"]["Use_Tensorb"]:
                if (k%10) == 0:
                    writer.add_scalar('FISTA_iterations', it, k)
                    M.MonitorGamma(gamma, k, option=['NNZ', '%', 'Sum', 'V'])
                    M.MonitorList(L, 'Loss_Dico', k)
                    M.MonitorList(L_v, 'Loss_v', k)
                    M.MonitorDicoBP(k)
                    M.ComputeHisto(gamma)

                if (k%100) == 0:
                    reco = [None] * (Net.nb_layers)
                    for i in range(Net.nb_layers-1,-1,-1):
                        reco[i] = gamma[i]
                        for j in range(i, -1, -1):
                            reco[i] = Net.layers[j].backward(reco[i])
                        reco_image = make_grid(reco[i],normalize=True,pad_value=1)
                        writer.add_image('Reco/L{0}'.format(i),reco_image,k)

            k += 1

    output_exp = {'Net': Net,
            'Loss': Loss,
            'Pursuit': Pursuit,
            'l2_loss': l2_loss,
            'l1_loss': l1_loss    
                 }
    
    
else :        
    with open(path, 'rb') as file:
        output_exp = pickle.load(file)
 


NETWORK STRUCTURE : 
 Input : (32, 1, 28, 28)
 Layer 1 : [32, 32, 12, 12]
 Layer 2 : [32, 64, 8, 8]


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at C:\cb\pytorch_1000000000000\work\torch\csrc\utils\python_arg_parser.cpp:1519.)
  p.data.add_(-group['lr'], d_p)


KeyboardInterrupt: 

In [4]:
path = 'Savings/MNIST/' + model_name +'.pkl'
with open(path, 'wb') as file:
    pickle.dump(output_exp, file, pickle.HIGHEST_PROTOCOL)

In [None]:
dataset = MNIST(data_path, transform=transform, train=False, download=True)

DataBaseT =  DataLoader(dataset,batch_size=32,shuffle=True,drop_last=True)


for idx_batch, data in enumerate(DataBaseT):

        batch = data[0].cuda()
        gamma, it, Loss_G, delta = Pursuit.coding(batch)


        for i in range(Net.nb_layers):
            Net.layers[i].dico.requires_grad = True
            L[i] = Loss.F(batch, gamma, i, do_feedback=False).div(batch.size()[0])  ## Unsupervised
            Net.layers[i].dico.requires_grad = False
            L[i].backward()
            opt_dico[i].step()
            opt_dico[i].zero_grad()
            
            l2_loss[i,k]= L[i].detach() 
            l1_loss[i,k] =  gamma[i].detach().sum().div(gamma[i].size(0))
                

        for i in range(Net.nb_layers):
            Net.layers[i].v.requires_grad = True  # turn_on(i)
            L_v[i] = Loss.F_v(batch, gamma, i).div(batch.size()[0])
            L_v[i].backward()
            Net.layers[i].v.requires_grad = False  # turn_off(i)
            opt_v[i].step()  
            opt_v[i].zero_grad()
                
        if config["Params"]["Use_Tensorb"]:
            if (k%10) == 0:
                writer.add_scalar('FISTA_iterations', it, k)
                M.MonitorGamma(gamma, k, option=['NNZ', '%', 'Sum', 'V'])
                M.MonitorList(L, 'Loss_Dico', k)
                M.MonitorList(L_v, 'Loss_v', k)
                M.MonitorDicoBP(k)
                M.ComputeHisto(gamma)

            if (k%100) == 0:
                reco = [None] * (Net.nb_layers)
                for i in range(Net.nb_layers-1,-1,-1):
                    reco[i] = gamma[i]
                    for j in range(i, -1, -1):
                        reco[i] = Net.layers[j].backward(reco[i])
                    reco_image = make_grid(reco[i],normalize=True,pad_value=1)
                    writer.add_image('Reco/L{0}'.format(i),reco_image,k)
                    
        k += 1