# Move Bounds

Notebook on moving bounds for systems, while also calculating the the $\sigma$s

In [None]:
from tvsclib.strict_system import StrictSystem
from tvsclib.stage import Stage
from tvsclib.system_identification_svd import SystemIdentificationSVD
from tvsclib.toeplitz_operator import ToeplitzOperator
from tvsclib.mixed_system import MixedSystem
import numpy as np
import scipy.linalg as linalg
import matplotlib.pyplot as plt
import scipy.linalg 
import scipy.stats 
import tvsclib.utils as utils
import tvsclib.math as math

from tvsclib.transformations.output_normal import OutputNormal
from tvsclib.transformations.input_normal import InputNormal
from tvsclib.transformations.reduction import Reduction
from tvsclib.approximation import Approximation


from structurednets.asset_helpers import get_animal_classes_filepath
from structurednets.extract_features import get_required_indices
from structurednets.models.visionmodel import VisionModel
from structurednets.models.alexnet import AlexNet
from structurednets.models.googlenet import GoogleNet
from structurednets.models.inceptionv3 import InceptionV3
from structurednets.models.mobilenetv2 import MobilenetV2
from structurednets.models.resnet18 import Resnet18
from structurednets.models.vgg16 import VGG16

import torchvision.models as models
import torch

In [None]:
import move

In [None]:
def plot_sigmas(sigmas_causal,sigmas_anticausal,axes=None):
    if axes is None:
        fig, axes = plt.subplots(2,len(sigmas_causal))
    for i in range(len(sigmas_causal)):
        axes[0,i].plot(sigmas_causal[i],np.arange(len(sigmas_causal[i])),'x')
        axes[1,i].plot(sigmas_anticausal[i],np.arange(len(sigmas_anticausal[i])),'x')
    return axes

## Create a tialor made matrix

Create mateices with combinations of orthogonal vectors such that we can create rank1 block matrices and combine these.

As we have a rank1 matrix, all singular values except the largest are irrelevent. Therefore we have a cost function that penalizes all singlaur values that are smaller than a fraction of the largrest singular value

In [None]:
dims_in =  np.array([6, 3, 5, 2])*3
dims_out = np.array([2, 5, 3, 6])*3

#create orthogonal vectors and normalize them to the size of the matix (i.e. norm(block)/size(block) = const
#Us =np.vstack([np.linalg.svd(np.random.rand(dims_out[i],dims_in[i]))[0][:,1:4]*dims_out[i] for i in range(len(dims_in))])
#Vts=np.hstack([np.linalg.svd(np.random.rand(dims_out[i],dims_in[i]))[2][1:4,:]*dims_in[i] for i in range(len(dims_in))])

#create orthogonal vectors and normalize them to the size of the matix (i.e. norm(block)/size(block) = const
Us =np.vstack([scipy.stats.ortho_group.rvs(dims_out[i])[:,:3]*dims_out[i] for i in range(len(dims_in))])
Vts=np.hstack([scipy.stats.ortho_group.rvs(dims_in[i])[:3,:]*dims_in[i] for i in range(len(dims_in))])



lower = Us[:,:1]@Vts[:1,:]
diag = Us[:,1:2]@Vts[1:2,:]
upper = Us[:,2:3]@Vts[2:3,:]
matrix = np.zeros_like(diag)
a=0;b=0
for i in range(len(dims_in)):
    matrix[a:a+dims_out[i],:b]            =lower[a:a+dims_out[i],:b]
    matrix[a:a+dims_out[i],b:b+dims_in[i]]=diag[a:a+dims_out[i],b:b+dims_in[i]]
    matrix[a:a+dims_out[i],b+dims_in[i]:] =upper[a:a+dims_out[i],b+dims_in[i]:]
    a+=dims_out[i];b+=dims_in[i]
plt.figure()
plt.matshow(matrix)

Create a system agnostic of the state boundareis but with the correct number of states

In [None]:
dims_in_start = [sum(dims_in)//4]*4
dims_out_start = [sum(dims_out)//4]*4
T = ToeplitzOperator(matrix, dims_in_start, dims_out_start)
S = SystemIdentificationSVD(T,epsilon=1e-12)
system = MixedSystem(S)
utils.show_system(system)

In [None]:
def cost_error_cut_rel(s,shape,s_a,shape_a):
    tau = 0.9
    c = np.sum(s[s<tau*np.max(s)]**2)/(shape[0]*shape[1])+\
        np.sum(s_a[s_a<tau*np.max(s_a)]**2)/(shape_a[0]*shape_a[1])
    #print("s:",s[s<tau*np.max(s)]," s_a:",s_a[s_a<tau*np.max(s_a)])
    return c


sys_move,input_dims,output_dims = move.move(system,3,cost_error_cut_rel,m_in=[4,2,1],m_out=[4,2,1])



utils.show_system(sys_move)

print(dims_in)
print(np.array(sys_move.dims_in))
print(dims_out)
print(np.array(sys_move.dims_out))


In [None]:
sigmas_causal,sigmas_anticausal = math.extract_sigmas(matrix, dims_in,dims_out)
ax = plot_sigmas(sigmas_causal,sigmas_anticausal)

# System from random matrix

Here we will enforce a fixed number of states. When recovering the segementation we will have an cost function that refelcts this knowlege.

In [None]:
dims_in =  np.array([6, 3, 5, 2])*3
dims_out = np.array([2, 5, 3, 6])*3
eps_ident = 0.6
matrix = np.random.rand(np.sum(dims_out),np.sum(dims_in))

T = ToeplitzOperator(matrix, dims_in, dims_out)
#S = SystemIdentificationSVD(T,epsilon=eps_ident)max_states_local
S = SystemIdentificationSVD(T,max_states_local=3)
system = MixedSystem(S)
print(system)
matrix = system.to_matrix()
plt.matshow(matrix)

In [None]:
dims_in_start = [sum(dims_in)//4]*4
dims_out_start = [sum(dims_out)//4]*4
T = ToeplitzOperator(matrix, dims_in_start, dims_out_start)
S = SystemIdentificationSVD(T,epsilon=1e-12)
system = MixedSystem(S)
utils.show_system(system)

eps = 0.9*eps_ident

def cost_error_cut_abs(s,shape,s_a,shape_a):
    c = np.sum(s[s<eps]**2)/(shape[0]*shape[1])\
    +np.sum(s_a[s_a<eps]**2)/(shape_a[0]*shape_a[1])
    return c

def cost_error_cut_number(s,shape,s_a,shape_a):
    v = 3
    c = np.sum(s[v:]**2)/(shape[0]*shape[1])\
    +np.sum(s_a[v:]**2)/(shape_a[0]*shape_a[1])
    return c

sys_move,input_dims,output_dims = move.move(system,3,cost_error_cut_number,m_in=[4,2,1],m_out=[4,2,1])

utils.show_system(sys_move)

print(dims_in)
print(np.array(sys_move.dims_in))
print(dims_out)
print(np.array(sys_move.dims_out))

In [None]:
sigmas_causal,sigmas_anticausal = math.extract_sigmas(matrix, dims_in,dims_out)
ax = plot_sigmas(sigmas_causal,sigmas_anticausal)

sigmas_causal,sigmas_anticausal = math.extract_sigmas(matrix, dims_in_start,dims_out_start)
ax = plot_sigmas(sigmas_causal,sigmas_anticausal,axes=ax)

sigmas_causal,sigmas_anticausal = math.extract_sigmas(matrix, sys_move.dims_in,sys_move.dims_out)
ax = plot_sigmas(sigmas_causal,sigmas_anticausal,axes=ax)

## Test Weight matrix

For this we introduce new cost functions:

In [None]:
def cost_sum(s,shape,s_a,shape_a):
    c =np.sum(s)+np.sum(s_a)
    return c

#cost relative to size
def cost_rel(s,shape,s_a,shape_a):
    c =np.sum(s**2)/(shape[0]*shape[1])+np.sum(s_a**2)/(shape_a[0]*shape_a[1])
    return c

#scale the singlaur values to the biggest singuar values
def cost_scaled(s,shape,s_a,shape_a):
    c =np.sum(s/np.max(s))+np.sum(s_a/np.max(s_a))
    return c

#cut relative to the biggest singular value and rescale to size
def cost_error_cut_rel(s,shape,s_a,shape_a):
    tau = 0.5
    c = np.sum(s[s<tau*np.max(s)]**2)/(shape[0]*shape[1])+\
        np.sum(s_a[s_a<tau*np.max(s_a)]**2)/(shape_a[0]*shape_a[1])
    return c

#def cost_error_cut_abs(s,shape,s_a,shape_a):
#    c = np.sum(s[s<eps]**2)+np.sum(s_a[s_a<eps]**2)
#    return c

#cut at fixed epsilons and rescale to size
def cost_error_cut_abs(s,shape,s_a,shape_a):
    c = np.sum(s[s<eps]**2)/(shape[0]*shape[1])\
    +np.sum(s_a[s_a<eps]**2)/(shape_a[0]*shape_a[1])
    return c

We change the structure of the objective: Instead of minimizing the approximation error without considering the cost we now use a differnt strategy.
We now fix the $\epsilon$ and minimize the cost and do not consider the approxiamtion error.

For this we use a global cost function.

In [None]:
def cost_computation(sigmas_causal,sigmas_anticausal,dims_in,dims_out):
    k = len(dims_in)
    dims_state_causal = np.zeros(k+1)
    dims_state_anticausal = np.zeros(k+1)
    #get the number of stages for each step
    for i in range(k-1):
        dims_state_causal[i+1] = np.count_nonzero(sigmas_causal[i]>eps)
        dims_state_anticausal[i+1] = np.count_nonzero(sigmas_anticausal[i]>eps)
    
    return math.cost(dims_in,dims_out,dims_state_causal,causal=True)\
            +math.cost(dims_in,dims_out,dims_state_anticausal,causal=False,include_D=False)
        
    
def cost_computation_soft(sigmas_causal,sigmas_anticausal,dims_in,dims_out):
    #a distorted version of the cost: do not have a hard threshold but consider smaller values
    k = len(dims_in)
    dims_state_causal = np.zeros(k+1)
    dims_state_anticausal = np.zeros(k+1)
    #get the number of stages for each step
    for i in range(k-1):
        dims_state_causal[i+1] = np.sum(np.minimum(sigmas_causal[i]/eps,1))
        dims_state_anticausal[i+1] = np.sum(np.minimum(sigmas_anticausal[i]/eps,1))
    
    return math.cost(dims_in,dims_out,dims_state_causal,causal=True)\
            +math.cost(dims_in,dims_out,dims_state_anticausal,causal=False,include_D=False)          

### AlexNet

In [None]:
#get matrix

model_class = AlexNet
label_filepath = get_animal_classes_filepath()
required_indices = get_required_indices(label_filepath)
model = model_class(required_indices)
output_mat = model.get_optimization_matrix().detach().numpy()

In [None]:
stages = 10

#set the dims
d_in = output_mat.shape[1]
boundaries = d_in/stages*np.arange(stages+1)
boundaries = np.round(boundaries).astype(int)
dims_in = boundaries[1:]-boundaries[:-1]

d_out = output_mat.shape[0]
boundaries = d_out/stages*np.arange(stages+1)
boundaries = np.round(boundaries).astype(int)
dims_out = boundaries[1:]-boundaries[:-1]

assert sum(dims_in)==d_in and sum(dims_out)==d_out

T = ToeplitzOperator(output_mat, dims_in,dims_out)
S = SystemIdentificationSVD(T,epsilon=1e-16)

system = MixedSystem(S)
#approx =Approximation(system)
print(system)

In [None]:
eps_max = np.linalg.svd(output_mat[int(np.floor(output_mat.shape[0]/2)):,:int(np.floor(output_mat.shape[1]/2))],compute_uv=False)[0]
eps = eps_max*0.5
print("eps:",eps)

sys_move,input_dims,output_dims = move.move(system,2,cost_computation,m_in=[2,1],m_out=[50,25],cost_global=True)


#sys_move,input_dims,output_dims = move.move(system,1,cost_error_cut_abs,m_in=1,m_out=15)
#utils.show_system(sys_move)


In [None]:
plt.hlines([np.sum(input_dims[:,0]),0],0,input_dims.shape[1]-1)
for i in range(input_dims.shape[0]-1):
    plt.hlines(np.sum(input_dims[:i+1,0]),0,input_dims.shape[1]-1)
    plt.plot(np.sum(input_dims[:i+1,:],axis=0),"+-")
    

In [None]:
plt.hlines([np.sum(output_dims[:,0]),0],0,output_dims.shape[1]-1)
for i in range(output_dims.shape[0]-1):
    plt.hlines(np.sum(output_dims[:i+1,0]),0,input_dims.shape[1]-1)
    plt.plot(np.sum(output_dims[:i+1,:],axis=0),"+-")

In [None]:
#utils.show_system(sys_move)
print(sys_move)

In [None]:
fig, ax = plt.subplots(2, len(dims_in)-1, figsize=(20,5))

sigmas_causal,sigmas_anticausal = math.extract_sigmas(output_mat, dims_in,dims_out)
ax = plot_sigmas(sigmas_causal,sigmas_anticausal,axes=ax)

stages_start = sum([np.count_nonzero(sig>eps) for sig in sigmas_causal])\
                +sum([np.count_nonzero(sig>eps) for sig in sigmas_anticausal])

sigmas_causal,sigmas_anticausal = math.extract_sigmas(sys_move.to_matrix(), sys_move.dims_in,sys_move.dims_out)
ax = plot_sigmas(sigmas_causal,sigmas_anticausal,axes=ax)

stages_moved = sum([np.count_nonzero(sig>eps) for sig in sigmas_causal])\
                +sum([np.count_nonzero(sig>eps) for sig in sigmas_anticausal])


for i in range(len(dims_in)-1):
    ax[0,i].vlines(eps,0,100)
    ax[1,i].vlines(eps,0,100)
    
print("stages start: ",stages_start)
print("stages moved: ",stages_moved)

In [None]:
utils.check_dims(sys_move)

In [None]:
approx_orig =Approximation(system)
approx_sys_orig=approx_orig.get_approxiamtion(eps)

approx_move =Approximation(sys_move)
approx_sys_move=approx_move.get_approxiamtion(eps)

print("Error regular:",np.linalg.norm(output_mat-approx_sys_orig.to_matrix()))
print("Error moved:",np.linalg.norm(output_mat-approx_sys_move.to_matrix()))

In [None]:
print("Cost matrix:",output_mat.size)
print("Cost regular:",approx_sys_orig.cost())
print("Cost moved:",approx_sys_move.cost())

### Mobilenet


Here we also try how the system behaves if one cuts the singular values bevore moving the bounds

In [None]:
def get_mobilenet_target_mats():
    target_mats = []
    # Load the model
    model = models.mobilenet_v2(pretrained=True)
    # Put moel into eval mode
    model.eval()
    for layer in model.classifier:
        if isinstance(layer, torch.nn.Linear):
            # Obtain the weights of this layer
            weights = layer.weight.detach().numpy()
            target_mats.append(weights)
    return target_mats


In [None]:
mat_mobilenet = get_mobilenet_target_mats()[0]

In [None]:
stages = 15

#set the dims
d_in = mat_mobilenet.shape[1]
boundaries = d_in/stages*np.arange(stages+1)
boundaries = np.round(boundaries).astype(int)
dims_in = boundaries[1:]-boundaries[:-1]

d_out = mat_mobilenet.shape[0]
boundaries = d_out/stages*np.arange(stages+1)
boundaries = np.round(boundaries).astype(int)
dims_out = boundaries[1:]-boundaries[:-1]

assert sum(dims_in)==d_in and sum(dims_out)==d_out

T = ToeplitzOperator(mat_mobilenet, dims_in,dims_out)
S = SystemIdentificationSVD(T,epsilon=2e-1)
system = MixedSystem(S)
#approx =Approximation(system)
print(system)

Here we have cut the singular values. We can also see that this only leaves relativeley few state dims.


In [None]:

eps_max = np.linalg.svd(mat_mobilenet[int(np.floor(mat_mobilenet.shape[0]/2)):,:int(np.floor(mat_mobilenet.shape[1]/2))],compute_uv=False)[0]
eps = eps_max*0.35
print("eps:",eps)

sys_move,input_dims,output_dims = move.move(system,3,cost_computation,m_in=[24,12,6],m_out=[24,12,6],cost_global=True)


#sys_move,input_dims,output_dims = move.move(system,1,cost_error_cut_abs,m_in=1,m_out=15)
#utils.show_system(sys_move)


In [None]:
plt.hlines([np.sum(input_dims[:,0]),0],0,input_dims.shape[1]-1)
for i in range(input_dims.shape[0]-1):
    plt.hlines(np.sum(input_dims[:i+1,0]),0,input_dims.shape[1]-1)
    plt.plot(np.sum(input_dims[:i+1,:],axis=0),"+-")
    

In [None]:
plt.hlines([np.sum(output_dims[:,0]),0],0,output_dims.shape[1]-1)
for i in range(output_dims.shape[0]-1):
    plt.hlines(np.sum(output_dims[:i+1,0]),0,input_dims.shape[1]-1)
    plt.plot(np.sum(output_dims[:i+1,:],axis=0),"+-")

In [None]:
print(sys_move)

In [None]:
fig, ax = plt.subplots(2, len(dims_in)-1, figsize=(20,5))

sigmas_causal,sigmas_anticausal = math.extract_sigmas(mat_mobilenet, dims_in,dims_out)
ax = plot_sigmas(sigmas_causal,sigmas_anticausal,axes=ax)

stages_start = sum([np.count_nonzero(sig>eps) for sig in sigmas_causal])\
                +sum([np.count_nonzero(sig>eps) for sig in sigmas_anticausal])

sigmas_causal,sigmas_anticausal = math.extract_sigmas(mat_mobilenet, sys_move.dims_in,sys_move.dims_out)
ax = plot_sigmas(sigmas_causal,sigmas_anticausal,axes=ax)

stages_moved = sum([np.count_nonzero(sig>eps) for sig in sigmas_causal])\
                +sum([np.count_nonzero(sig>eps) for sig in sigmas_anticausal])

for i in range(len(dims_in)-1):
    ax[0,i].vlines(eps,0,600)
    ax[1,i].vlines(eps,0,600)
    
print("stages start: ",stages_start)
print("stages moved: ",stages_moved)

In [None]:
fig, ax = plt.subplots(2, len(dims_in)-1, figsize=(20,5))
ax = plot_sigmas(sigmas_causal,sigmas_anticausal,axes=ax)

sigmas_causal,sigmas_anticausal = math.extract_sigmas(sys_move.to_matrix(), sys_move.dims_in,sys_move.dims_out)
ax = plot_sigmas(sigmas_causal,sigmas_anticausal,axes=ax)

for i in range(len(dims_in)-1):
    ax[0,i].vlines(eps,0,600)
    ax[1,i].vlines(eps,0,600)

In [None]:
approx_orig =Approximation(system)
approx_sys_orig=approx_orig.get_approxiamtion(eps)

approx_move =Approximation(sys_move)
approx_sys_move=approx_move.get_approxiamtion(eps)

print("Error regular:",np.linalg.norm(mat_mobilenet-approx_sys_orig.to_matrix()))
print("Error moved:",np.linalg.norm(mat_mobilenet-approx_sys_move.to_matrix()))

In [None]:
print("Cost matrix:",mat_mobilenet.size)
print("Cost regular:",approx_sys_orig.cost())
print("Cost moved:",approx_sys_move.cost())

In [None]:
utils.show_system(sys_move)