<!-- # Dictionary Learning

Solve orthogonal dictionary learning problem taken from: Bai, Yu, Qijia Jiang, and Ju Sun. "Subgradient descent learns orthogonal dictionaries." arXiv preprint arXiv:1810.10702 (2018). -->

## Problem Description

<!-- Given data $\{y_i \}_{i \in[m]}$ generated as $y_i = A x_i$, where $A \in R^{n \times n}$ is a fixed unknown orthogonal matrix and each $x_i \in R^n$ is an iid Bernoulli-Gaussian random vector with parameter $\theta \in (0,1)$, recover $A$. 

Write $Y \doteq [y_1,...,y_m]$ and $X \doteq [x_1,...,x_m]$. To find the column of $A$, one can perform the following optimization:

$$\min_{q \in R^n} f(q) \doteq \frac{1}{m} ||q^T Y||_{1} = \frac{1}{m} \sum_{i=1}^m |q^T y_i|,$$
$$\text{s.t.} ||q||_2 = 1$$

This problem is nonconvex due to the constraint and nonsmooth due to the objective.

Based on the above statistical model, $q^T Y = q^T A X$ has the highest sparsity when $q$ is a column of $A$ (up to sign) so that $q^T A$ is 1-sparse.  -->

## Modules Importing
Import all necessary modules and add NCVX src folder to system path.

In [1]:
import time
import numpy as np
import torch
import numpy.linalg as la
from scipy.stats import norm
import sys
## Adding NCVX directories. Should be modified by user
sys.path.append('/home/buyun/Documents/GitHub/NCVX')
from ncvx import ncvx
from ncvxStruct import Options, GeneralStruct 
from torchvision import datasets
from torchvision.transforms import ToTensor
from private.getNvar import getNvarTorch
import torch.nn as nn


## Data Generation 
Specify torch device, model class, and generate data.

NOTE: please specify path for downloading data.

Use GPU for this problem. If no cuda device available, please set *device = torch.device('cpu')*

In [2]:
device = torch.device('cuda')
batch_size = 100
m = batch_size


train_dataset = datasets.MNIST(
    root = '/home/buyun/Documents/GitHub/NCVX/examples/data/mnist',
    train = True,                         
    transform = ToTensor(), 
    download = False,            
)
test_dataset = datasets.MNIST(
    root = '/home/buyun/Documents/GitHub/NCVX/examples/data/mnist', 
    train = False, 
    transform = ToTensor()
)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

inputs, labels = next(iter(train_loader))
inputs, labels = inputs.reshape(-1, 28 * 28).to(device=device, dtype=torch.double), labels.to(device=device)

class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim, output_dim).to(device=device, dtype=torch.double)

    def forward(self, x):
        outputs = self.linear(x).to(device=device, dtype=torch.double)
        return outputs
    
input_dim = 784
output_dim = 10
model = LogisticRegression(input_dim, output_dim)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


## Problem Definition

Specify optimization variables, and objective and constraint(s).

Note: please strictly follow the format of comb_fn, which will be used in the NCVX main algortihm.


In [3]:
# variables and corresponding dimensions.
var_in = {}
var_count = 0
var_str = "x"
for i in model.parameters():
    # print(i.shape)
    var_in[var_str+str(var_count)]= list(i.shape)
    var_count += 1
    
lambda_r = 0.001

def obj_eval_fn(X_struct):
    # objective function
    var_str = "x"
    var_count = 0
    for p in model.parameters():
        tmpstr = var_str+str(var_count)
        tmp_parameter = getattr(X_struct,tmpstr)
        tmp_parameter.requires_grad_(True)
        p.data = tmp_parameter # update model parameters
        var_count += 1
    
    w = X_struct.x0
    outputs = model(inputs)
    criterion = nn.CrossEntropyLoss() 
    f = criterion(outputs, labels)  + lambda_r*torch.norm(w,p=1)
    return f
    

def comb_fn(X_struct):
    # objective function
    var_str = "x"
    var_count = 0
    for p in model.parameters():
        tmpstr = var_str+str(var_count)
        tmp_parameter = getattr(X_struct,tmpstr)
        tmp_parameter.requires_grad_(True)
        p.data = tmp_parameter # update model parameters
        var_count += 1
        
    w = X_struct.x0
    outputs = model(inputs)
    criterion = nn.CrossEntropyLoss()
    f = criterion(outputs, labels) + lambda_r*torch.norm(w,p=1)
    ci = None
    ce = None
    return [f,ci,ce]

## User Options
Specify user-defined options for NCVX

In [4]:
opts = Options()
nvar = getNvarTorch(model.parameters())
opts.QPsolver = 'osqp' 
opts.maxit = 3000
opts.x0 = torch.nn.utils.parameters_to_vector(model.parameters()).detach().reshape(nvar,1)
opts.opt_tol = 1e-6
opts.fvalquit = 1e-6
opts.print_level = 1
opts.print_frequency = 100
# opts.print_ascii = True
opts.halt_on_linesearch_bracket = False
opts.max_fallback_level = 3
opts.min_fallback_level = 2
opts.init_step_size = 1e-2
opts.linesearch_maxit = 25
opts.is_backtrack_linesearch = True
opts.searching_direction_rescaling = True
opts.disable_terminationcode_6 = True

## Initial Test 
Check initial accuracy of the defined model

In [5]:
outputs = model(inputs )
acc = (outputs.max(1)[1] == labels).sum().item()/labels.size(0)

print("Initial acc = {}".format(acc)) 

Initial acc = 0.12


## Main Algorithm

In [6]:
start = time.time()
soln = ncvx(combinedFunction = comb_fn, objEvalFunction = obj_eval_fn, var_dim_map = var_in, nn_model= model, torch_device = device, user_opts = opts)
end = time.time()
print("Total Wall Time: {}s".format(end - start))



[33m╔═════ QP SOLVER NOTICE ════════════════════════════════════════════════════════════════════╗
[0m[33m║  NCVX requires a quadratic program (QP) solver that has a quadprog-compatible interface,  ║
[0m[33m║  the default is osqp. Users may provide their own wrapper for the QP solver.              ║
[0m[33m║  To disable this notice, set opts.quadprog_info_msg = False                               ║
[0m[33m╚═══════════════════════════════════════════════════════════════════════════════════════════╝
[0m══════════════════════════════════════════════════════════════════════════════════════════════╗
NCVX: A User-Friendly and Scalable Package for Nonconvex Optimization in Machine Learning     ║ 
Version 1.1.1                                                                                 ║ 
MIT License Copyright (c) 2021 SUN Group @ UMN                                                ║ 
══════════════════════════════════════════════════════════════════════════════════════════════╣


## Train Accuracy

In [7]:
torch.nn.utils.vector_to_parameters(soln.final.x, model.parameters())
outputs = model(inputs)
acc = (outputs.max(1)[1] == labels).sum().item()/labels.size(0)
print("Train acc = {}".format(acc))

Train acc = 1.0


## Test Accuracy

In [8]:
test_inputs, test_labels = next(iter(test_loader))
test_inputs, test_labels = test_inputs.reshape(-1, 28 * 28).to(device=device, dtype=torch.double), test_labels.to(device=device)
    
# test_labels = test_labels.to(device=device ) # label/target [256]
# test_inputs = test_inputs.to(device=device, dtype=torch.double) # input data [256,3,32,32]

test_outputs = model(test_inputs)
test_acc = (test_outputs.max(1)[1] == test_labels).sum().item()/test_labels.size(0)
print("Test acc = {}".format(test_acc))

Test acc = 0.74
