## Explore sparse rank approximations

The idea was that we can think of thaking the best rank 1 approxiamtion by the SVD as solving the optimization problem


$$\max_{v} \|Au \|_2\qquad  \text{subject to: } \|u\|_2 = 1$$

to make this sparse we want to minimize the $\|x\|_1$ norm.
But we want to do this on both sides

Therefore we use the SVD $A = U S V^\top$ to optimize both sides at the same time.

$$v = VSx$$

$$u = Ux$$

We solve the optimization problem 

$$\max_{v} \|VSx \|_1+\|Ux \|_1\qquad  \text{subject to: } \|x\|_2 = 1$$

Thsi can be done using manifold optimization

In [None]:
import numpy as np
import matplotlib.pylab as plt
import scipy.optimize as spopt

In [None]:
def g(a):
    a_prime = Sinv*a #here the diagonal matrix is stored as vector
    return np.linalg.norm(V@a_prime,ord=1)+np.linalg.norm(U@a,ord=1)

In [None]:
U = np.array([[1,2],[3,4]])
V = np.array([[1,2],[3,4],[4,5]])
Sinv = np.array([1,1])

g(np.array([1,0]))-(1+3+1+3+4)

In [None]:
def gradg(a):
    a_prime = Sinv*a #here the diagonal matrix is stored as vector
    return Sinv*(V.T@np.sign(V@a_prime))+U.T@np.sign(U@a)

In [None]:
a = np.array([0,5])
spopt.optimize.approx_fprime(a,g,1e-3)

In [None]:
gradg(np.array([1,0]))

In [None]:
n = 100
m = 200

A = np.random.rand(n,m)
#A = np.array([[1,0],[1,0],[1,0]])

In [None]:
u, s, vh = np.linalg.svd(A, full_matrices=False)
U = u 
V = vh.T
Sinv = 1/s #Here soemthing needed if a s is close to 0, eventuelly reduce dimentions of a

In [None]:
#More testing
a = np.zeros_like(s)
a[0] = 1 #This is the largest singular value -> small value
display(g(a))
a[-1] = 1 #Thsi is the smallest singular value -> big g
display(g(a))

In [None]:
#gradient checking
a = np.zeros_like(s)
a[10] = 1
#display(gradg(a))
#display(spopt.optimize.approx_fprime(a,g,1e-8))
display(np.max(np.abs(gradg(a)-spopt.optimize.approx_fprime(a,g,1e-8))))
display(np.max(np.abs(gradg(a))))

In [None]:
#Projector
def proj(x,h):
    return h-x*x@h

In [None]:
a = np.zeros_like(s)
a[0] = 1 #This is a possible initial value, check the results
proj(a,gradg(a))

In [None]:
def gamma(x,h,t): #geodesic
    l_h = np.linalg.norm(h)
    return x*np.cos(t*l_h)+h*(np.sin(t*l_h)/l_h)

In [None]:
a = np.zeros_like(s)
a[0] = 1
h = np.zeros_like(s)
h[2] = 1
np.linalg.norm(gamma(a,h,0.1))

In [None]:
N = 500
As = np.zeros((N,a.size))
Gs = np.zeros((N,a.size))
vals = np.zeros(N)

In [None]:
#a initialisiern
a = np.zeros_like(s)
a[0]=1 #This is the largest singular value -> small value
a = a+0.7*np.random.rand(s.size) 


#a = Sinv*V.T@

a = a/np.linalg.norm(a)


In [None]:
val = g(a)
for i in range(N):
    gr = gradg(a)
    #gr = spopt.optimize.approx_fprime(a,g,1e-10)
    Gs[i,:]=gr
    for n in range(30):
        a_int = gamma(a,-proj(a,gr),2e-1/(1.5**n))
        if g(a_int)<val:
            a = a_int
            val = g(a)
            print(n)
            break
    else:
        print('no improvement at i=',i)
        break
    As[i,:]=a
    vals[i]= val
    print(g(a))

In [None]:
plt.plot(vals)

In [None]:
def get_rank1(A):
    u, s, vh = np.linalg.svd(A, full_matrices=False)
    global U
    global V
    global Sinv
    U = u 
    V = vh.T
    
    
    n = np.count_nonzero(s>1e-5*s[0])
    print(n)
    s = s[0:n]
    U = U[:,0:n]
    V = V[:,0:n]
    
    Sinv = 1/s
    print('Max_sinv')
    display(max(Sinv))
    
    #a initialisiern
    a = np.zeros_like(s)
    a[0]=1 #This is the largest singular value -> small value
    valref = g(a) #get the reference value
    for i in range(30):
        a = a+0.5*np.random.rand(s.size) #perturb it to move out of local minimum?
        a = a/np.linalg.norm(a)
        val = g(a)
        for i in range(N):
            gr = gradg(a)
            #gr = spopt.optimize.approx_fprime(a,g,1e-10)
            #Gs[i,:]=gr
            for n in range(30):
                a_int = gamma(a,-proj(a,gr),2e-1/(1.5**n))
                if g(a_int)<val:
                    a = a_int
                    val = g(a)
                    #print(n)
                    break
            else:
                print('no improvement at i=',i)
                break
        #As[i,:]=a
        #vals[i]= val
        
        if val<valref:
            
            break
    else:
        print('no better start a_0 found')
    print('Final g(a):',g(a))
    print('val/valref',val/valref)
    return [U@a,V@(Sinv*a)]

In [None]:
[u,v] = get_rank1(A)

In [None]:
np.max(np.abs(A@v-u))

In [None]:
vec = v
vec = vec/np.max(vec)
fig, axs = plt.subplots(1, 2, sharey=True, tight_layout=True)

axs[0].scatter(abs(vec),np.arange(1,vec.size+1))
axs[1].hist(abs(vec),bins = 50);


In [None]:
def approx(Us,a,Vs):
    #calcaultes the approxiamtion
    
    #Us: Matrix, collums are us
    #a:  factors
    #Vs: Matrix, collumns are vs
    return Us@np.diag(a)@Vs.T

In [None]:
u, s, vh = np.linalg.svd(A, full_matrices=False)
np.max(abs(A-approx(u,s,vh.T)))

In [None]:
def inner(A,u,v):
    #calcaulates the inner product of flatten(A) with flatten(u@v.T)
    n = u.size
    m = v.size
    return np.sum(A*(u.reshape(n,1)@(v.reshape(1,m))))

In [None]:
display(inner(np.eye(3),np.array([0,1,0]),np.array([0,1,0])))
display(inner(np.eye(3),np.array([1,0,0]),np.array([0,1,0])))

In [None]:
def matrixnorm(u,v):
    n = u.size
    m = v.size
    return np.linalg.norm((u.reshape(n,1)@(v.reshape(1,m))))
    

In [None]:
def calc_new_atom(R):
    #R is the current residuum

    [u,v] = get_rank1(R)
    
    #set small values to 0
    u[u<0.005*np.max(u)]=0
    v[v<0.005*np.max(v)]=0
    
    u = u/matrixnorm(u,v)
    #print(matrixnorm(u,v))
    s = inner(R,u,v) 
    
    
    
    return u,s,v

In [None]:
i = 0 #index for Array
n = 70
m = 60
A = np.random.standard_normal((n,m))
d = 100 #number of matricies
Us = np.zeros((n,d))
Vs = np.zeros((m,d))
a = np.zeros(d)

In [None]:
for i in range(d):
    A_tilde = approx(Us,a,Vs)
    R = A-A_tilde
    print('||R||=',np.linalg.norm(R),'gamma=',gamma)
    u,s,v = calc_new_atom(R)
    Us[:,i]=u
    Vs[:,i]=v
    a[i]=s


In [None]:
def number_of_params(Qs):
    #returns a vector with the number of nonzero eleemts for each vector
    return np.count_nonzero(Qs,axis = 0)

In [None]:
plt.plot(number_of_params(Us),'x')
plt.plot(number_of_params(Vs),'x')

In [None]:
np.count_nonzero(Us)

In [None]:
np.count_nonzero(Vs)

In [None]:
np.max(A-A_tilde)