In [22]:
from sys import getsizeof
import numpy as np
import matplotlib.pyplot as plt
import torch
from adopty.lista import Lista
from adopty.datasets import make_coding
from adopty.loss_and_gradient import cost_lasso


from itertools import combinations, permutations
from copy import deepcopy
from pympler.asizeof import asizeof

seed = np.random.randint(0, 1000)
print(seed)
rng = np.random.RandomState(seed)

773


In [23]:
def loss_lasso(z, x, D, reg):
    res = np.dot(z, D) - x
    return 0.5 * np.sum(res ** 2, axis=1) + reg * np.sum(np.abs(z), axis=1)


In [24]:
n_dim = 2
n_atoms = 5
n_s = 5000

x, D, z = make_coding(n_s, n_atoms, n_dim, rng)
L = np.linalg.norm(D, ord=2) ** 2
reg = 0.5

In [25]:
def spca_bourin(A, k_=None):
    k, _ = A.shape
    if k_ is None:
        k_ = k
    l_list = []
    for i in range(1, k_+1):
        l_max = 0.
        for idx in permutations(range(k), i):
            B = A[idx, :][:, idx]
            l_max = max(l_max, np.linalg.norm(B, ord=2))
        l_list.append(l_max)
    return l_list

In [26]:
step_list = spca_bourin(D.dot(D.T), n_atoms)

In [27]:
print(step_list, L)

[1.0000000000000002, 1.999966830928037, 2.964472913372718, 3.8808372097944384, 3.881130864947346] 3.8811308649473455


In [28]:
z_star = Lista(D, 1000).transform(x, reg)
l_star = loss_lasso(z_star, x, D, reg)

In [32]:
l_lista = []
l_ista = []
layers = [30]
#layers = [2]
for n_layers in layers:
    ista = Lista(D, n_layers)
    z_ista = ista.transform(x, reg)
    loss_ista = loss_lasso(z_ista, x, D, reg)
    lista = Lista(D, n_layers, parametrization='coupled', learn_threshold=True, max_iter=500).fit(x, reg)
    z_lista = lista.transform(x, reg)
    loss_lista = loss_lasso(z_lista, x, D, reg)
    l_lista.append(np.mean(loss_lista - l_star))
    l_ista.append(np.mean(loss_ista - l_star))

l_lista = np.array(l_lista)
l_ista = np.array(l_ista)

Fitting model (layer 17/30):  20.20%

KeyboardInterrupt: 

In [None]:
cs_list = []
stp_list = []
tresh_list = []
for layer in range(n_layers):
    W = list(lista.parameters())[2 * layer].detach().numpy()
    cs = np.dot(W.T.ravel(), D.ravel()) / np.sqrt(W.ravel().dot(W.ravel()) * D.ravel().dot(D.ravel()))
    cs_list.append(cs)
    stp = np.median(W.T / D * L)
    stp_list.append(stp)
    tresh = list(lista.parameters())[2 * layer + 1].detach().numpy() * L
    tresh_list.append(tresh)
plt.plot(stp_list)   
plt.plot(cs_list)
plt.plot(tresh_list)
plt.hlines(1, 0, n_layers, linestyles='--')
plt.hlines(L / step_list[-3], 0, n_layers, linestyles='-')

In [None]:
layer_start = 20
lista_dumb = deepcopy(lista)
for layer in range(layer_start, n_layers):
    parameters_layer = lista_dumb.params[layer]
    parameters_layer[0].data = torch.from_numpy(D.T / L)
    parameters_layer[1].data = torch.tensor([1. / L]).double()
loss_dumb = np.median(loss_lasso(lista_dumb.transform(x, reg), x, D, reg) - l_star)

In [None]:
lista_hack = deepcopy(lista)
s_change = []
s_old = np.max(np.sum(lista_hack.transform(x, reg, output_layer=layer_start) != 0, axis=1))
print(s_old)
for layer in range(layer_start, n_layers):
    s = np.max(np.sum(lista_hack.transform(x, reg, output_layer=layer) != 0, axis=1))
    if s != s_old:
        s_change.append(layer)
    s_old = s
    L_better = step_list[s-2]
    parameters_layer = lista_hack.params[layer]
    parameters_layer[0].data = torch.from_numpy(D.T / L_better)
    parameters_layer[1].data = torch.tensor([1. / L_better]).double()
    #parameters_layer[0].data *= L / L_better
    #parameters_layer[1].data *= L / L_better
loss_hack = np.median(loss_lasso(lista_hack.transform(x, reg), x, D, reg) - l_star)

In [None]:
L / L_better

In [None]:
networks = [ista, lista, lista_hack, lista_dumb]
names = ['ista', 'lista', 'hack', 'dumb']

losses = {}
for name in names:
    losses[name] = []
layers_ = range(1, n_layers)
for layer in layers_:
    for network, name in zip(networks, names):
        z = network.transform(x, reg, output_layer=layer)
        loss_network = loss_lasso(z, x, D, reg)
        losses[name].append(np.mean(loss_network - l_star))


In [None]:
for name in names:
    plt.semilogy(layers_, losses[name], label=name)
plt.vlines(s_change, *plt.ylim())
plt.legend()
plt.show()

In [15]:
list(lista.parameters())[-2:]

[Parameter containing:
 tensor([[-0.2669,  0.2306,  0.2766,  0.0017,  0.2834],
         [ 0.0991,  0.1667,  0.0650, -0.2841, -0.0210]], dtype=torch.float64,
        requires_grad=True), Parameter containing:
 tensor(0.2844, dtype=torch.float64, requires_grad=True)]

In [16]:
tresh_list

[5.368208566290194,
 2.4590897240068075,
 1.4390737406032188,
 1.4093806336589794,
 1.2219765438291368,
 1.1712411427672254,
 1.1314893767022596,
 1.0816979655763257,
 1.1171617453680334,
 1.0882991561115798,
 1.0688079909825037,
 1.05475627463895,
 1.0423246444240015,
 1.029618265504395,
 1.0171097989063607,
 1.0101421248488287,
 1.0101866370817438,
 1.0097508310568604,
 1.0085179971329419,
 1.0076209829390907,
 1.0065139006336663,
 1.005760800030528,
 1.0047876981954391,
 1.003743267602735,
 1.0034854614646633,
 1.002344589316091,
 1.001356177490925,
 1.0007461006903706,
 1.0006658916671207,
 1.00049599435775]

In [17]:
L = np.linalg.norm(D, ord=2) ** 2
W.T / D * L

array([[1.00196853, 0.99818494],
       [1.00119368, 1.00069622],
       [0.9998827 , 0.99746835],
       [0.98217105, 0.99941302],
       [0.99994973, 0.97097629]])

In [18]:
d

NameError: name 'd' is not defined

In [None]:
print(list(lista_hack.parameters())[-1])
print(list(lista.parameters())[-1])

In [None]:
cs_list = []
stp_list = []
tresh_list = []
for layer in range(n_layers):
    W = list(lista.parameters())[2 * layer].detach().numpy()
    cs = np.dot(W.T.ravel(), D.ravel()) / np.sqrt(W.ravel().dot(W.ravel()) * D.ravel().dot(D.ravel()))
    cs_list.append(cs)
    stp = np.median(W.T / D * L)
    stp_list.append(stp)
    tresh = list(lista.parameters())[2 * layer + 1].detach().numpy() * L
    tresh_list.append(tresh)
plt.plot(stp_list)   
plt.plot(cs_list)
plt.plot(tresh_list)
plt.hlines(1, 0, n_layers, linestyles='--')

In [None]:
tresh_list

In [None]:
l_ista