# Linear Transformation 
We will start with the alignement of the same models for different seeds. 
- First we do not restrict the matrix.
- Second we restrict the matrix to be a rotation matrix.
- Thrid use affine transformation 
- Last but not least we are using different norms and regularization techniques to improve the results



Steps: 
- Load the same model but with different seed
- Sample different images and get latent representation 
- Create Datamatrix X and X'
- Solve the simple optimization problem (Using closed form solution as well cvxpy solver)


In [1]:
# Import relevant libraries
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision.transforms as transforms
from sklearn.decomposition import PCA
import cvxpy as cp

In [2]:
#reimpmort modules with autoreload
%load_ext autoreload
%autoreload 2


In [35]:
# Configuration

config = {
    'path1': "/Users/federicoferoggio/Documents/vs_code/latent-communication/models/checkpoints/VAE/MNIST/MNIST_VAE_1_10.pth",
    'modelname1': 'VAE',
    'seed1': '1',
    'path2': "/Users/federicoferoggio/Documents/vs_code/latent-communication/models/checkpoints/VAE/MNIST/MNIST_VAE_4_10.pth",
    'modelname2': 'VAE',
    'seed2': '1',
    'num_samples': '1000',
    'storage_path': 'VAE-ResNet-LinearTransform',

}

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")





# Optimization Problem in the Linear Case 
Let $x^i,y^i \in \mathbb{R^n}$ for $i = 1,...,m$ and $A \in \mathbb{R}^{n \times n}$ we are looking for the optimal A, which solves the following optimization problem 
$$ min_A \sum_{i = 1}^n ||Ax^i - y^i||^2 $$
$$ min_ a \sum_{i=1} \sum_{j =1} (A_{(j)} x^i - y^i_j)^2 $$
where we are using the euclidian norm when not otherwise stated.



## Load Model and Transformed Data for VAE
In this section we load the trained models, which we prepaired for our experimental setup

In [36]:
import sys

sys.path.append('..')
from utils.dataloaders.DataLoaderMNIST_single import DataLoader_MNIST


def load_model(model_name, model_path):
    DEVICE = torch.device('cpu')
    
    if model_name == 'VAE':
        from models.definitions.vae import VAE
        model = VAE(in_dim=784, dims=[256, 128, 64, 32], distribution_dim=16).to(DEVICE)
    elif model_name == 'resnet':
        from models.definitions.resnet import ResNet
        model = ResNet().to(DEVICE)
    else:
        raise ValueError(f"Unknown model name: {model_name}")
    
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    return model

def load_Models():
    model1 = load_model(config['modelname1'], config['path1'])
    model2 = load_model(config['modelname2'], config['path2'])
    return model1, model2

def get_transformations(model_name):
    if model_name == 'VAE':
        return [
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,)),
            transforms.Lambda(lambda x: x.view(-1))
        ]
    elif model_name == 'resnet':
        return [
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x.repeat(3, 1, 1))
        ]
    else:
        raise ValueError(f"Unknown model name: {model_name}")

def transformations():
    transformations1 = get_transformations(config['modelname1'])
    transformations2 = get_transformations(config['modelname2'])
    return transformations1, transformations2

# Load models
model1, model2 = load_Models()

# Get transformations
transformations1, transformations2 = transformations()

# Initialize data loader
data_loader_VAE_1 = DataLoader_MNIST(128, get_transformations(config['modelname1']), seed=10)
len_dataset_train = len(data_loader_VAE_1.get_train_loader().dataset)
print(len_dataset_train)

60000


# Sample from Dataset

In [37]:
print(int(config['num_samples']))

1000


In [38]:
from utils.sampler import simple_sampler, class_sampler
indices = np.random.permutation(len_dataset_train)[:int(config['num_samples'])]
z1 = simple_sampler(indices, model1, transformations1, DEVICE)
z2 = simple_sampler(indices, model2, transformations2, DEVICE)
#
# Sample from each class
print(z1.shape, z2.shape)

(1000, 32) (1000, 32)


# Convex Optimization Solver
## Linear Transformation with regularization
We are using the cvxpy solver which is a open source solver for convex optimization problems.

In [39]:
import os 
#ä Get wd 
print(os.getcwd())
# Linear Transformation
from optimizer import LinearFitting

linear_fitting = LinearFitting(z1, z2,lamda=0.01)

linear_fitting.solve_problem()

loss, A  = linear_fitting.get_results()

name = 'Linear_' + config['modelname1'] + '_' + config['seed1'] + '_' + config['modelname2'] + '_' + config['seed2'] + '_' + config['num_samples']
path = 'results/' + config['storage_path'] + '/' + name 

#linear_fitting.save_results(path)



/Users/federicoferoggio/Documents/vs_code/latent-communication/optimization
Solving the problem
Defining the problem




## Affine transformation
In this section we implement the affine transformation. We are adding a offset to the problem.

In [None]:
from optimizer import AffineFitting

affine_fitting = AffineFitting(z1, z2, lamda=0.01)

# Solve the problem
affine_fitting.solve_problem()

# Get the results
loss, A, b = affine_fitting.get_results()

# Show the results
print('Loss:', loss)
print('A:', A)
print('b:', b)

# Save the results
name = 'Affine_' + config['modelname1'] + '_' + config['seed1'] + '_' + config['modelname2'] + '_' + config['seed2'] + '_' + config['num_samples']
path = 'optimization/'+ config['storage_path'] + '/' + name


# Save the results
affine_fitting.save_results(path)



## Linear Transformation with constraints (psd)
In this section we relax the problem and consider that the matrix has to be positive semidefinite.

In [None]:
# Optimization Variable
A_psd = cp.Variable((32, 32))

# Loss Function
loss_psd = cp.norm2(cp.vstack([A_psd @ z1[i] - z2[i] for i in range(z1.shape[0])]))**2 + lamda * cp.norm(A_psd, 'fro')**2

# Objective Function
objective_psd = cp.Minimize(loss_psd)

# Constraints
constraints = [A_psd >> 0]

# Problem

problem_psd = cp.Problem(objective_psd, constraints)

# Solve the problem

problem_psd.solve()

# Print results
print("Optimal value: ", problem_psd.value)
print(A_psd.value)



## Affine Transfomrmation with constraint (psd)

In [None]:
A_aff_psd = cp.Variable((32, 32))
b_aff_psd = cp.Variable(32)

# Loss Function
loss_aff_psd = cp.norm2(cp.vstack([A_aff_psd @ z1[i] + b_aff_psd - z2[i] for i in range(z1.shape[0])]))**2 + lamda * cp.norm(A_aff_psd, 'fro')**2

# Objective Function
objective_aff_psd = cp.Minimize(loss_aff_psd)

# Constraints
constraints_aff_psd = [A_aff_psd >> 0]

# Problem
problem_aff_psd = cp.Problem(objective_aff_psd, constraints_aff_psd)

# Solve the problem
problem_aff_psd.solve()

# Print results
print("Optimal value: ", problem_aff_psd.value)
print(A_aff_psd.value)
print(b_aff_psd.value)


## Trying different norms 
We begin to reformulate the problem with the L1 Norm



In [None]:
A_L1 = cp.Variable((32, 32))

#May need another solver



# Loss Function
loss_L1 = cp.norm1(cp.vstack([A_L1 @ z1[i] - z2[i] for i in range(z1.shape[0])]))**2 + lamda * cp.norm(A_L1, '')**2

# Objective Function
objective_L1 = cp.Minimize(loss_L1)


# Problem
problem_L1= cp.Problem(objective_L1)

# Solve the problem
problem_L1.solve(verbose=True, solver=cp.ECOS)

# Print results
print("Optimal value: ", problem_L1.value)
print(A_L1.value)


# Compare the total distance 

In [None]:
# Get Latent Space for al