In [1]:
import os
import torch
import torchvision.transforms as transforms
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from utils.dataloaders.full_dataloaders import DataLoaderMNIST, DataLoaderCIFAR10, DataLoaderCIFAR100, DataLoaderFashionMNIST
from models.definitions.PCKTAE import PocketAutoencoder
import pandas as pd
from tqdm import tqdm
import itertools
import sys
import logging

logging.getLogger('torchvision.datasets').setLevel(logging.ERROR)
os.chdir('/Users/federicoferoggio/Documents/vs_code/latent-communication')


# Initialize DataFrame with additional columns
loss_dataset = pd.DataFrame(columns=['Dataset', 'Model','Latent Size', 'Seed', 'Loss'])

# Define the lists
datasets_list = [ 'MNIST' ,'FMNIST', 'CIFAR10', 'CIFAR100']
seeds = [1, 2, 3]
paths = ['models/checkpoints/SMALLAE/MNIST/', 'models/checkpoints/SMALLAE/FMNIST/', 'models/checkpoints/SMALLAE/CIFAR10/', 'models/checkpoints/SMALLAE/CIFAR100/']
dataloader_l = [DataLoaderMNIST, DataLoaderFashionMNIST, DataLoaderCIFAR10, DataLoaderCIFAR100]
channels_input = [1, 1, 3, 3]
size_input = [28, 28, 32, 32]  
epochs = [20]
batch_sizes = [128]
learning_rates = [0.005]
latent_sizes = [10, 30, 50, 100, 500, 1000]

# Create a list of tuples with dataset, corresponding path, dataloader, and input channels
dataset_info = list(zip(datasets_list, paths, dataloader_l, channels_input, size_input))

# Split dataset_info based on the dataset type
mnist_info = dataset_info[:2]
cifar_info = dataset_info[2:]

# Create combinations for MNIST and CIFAR datasets separately
combinations1 = [mnist_info, seeds, epochs, batch_sizes, learning_rates, latent_sizes[:3]]
combinations1 = list(itertools.product(*combinations1))

combinations2 = [cifar_info, seeds, epochs, batch_sizes, learning_rates, latent_sizes[3:]]
combinations2 = list(itertools.product(*combinations2))

# Combine both sets of combinations
combinations = combinations1 + combinations2

# Print the combinations and their count
print(combinations, len(combinations))

# Example of setting device and augmentations (adjust according to your actual use case)
DEVICE = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

augmentations_mnist = transforms.Compose([transforms.ToTensor()])
augmentations_cifar = transforms.Compose([transforms.ToTensor()])

for combo in tqdm(combinations):
    (dataset, path, dataloader, channels_input, size_input), seed, epoch, batch_size, learning_rate, latent_size = combo
    print(f"Dataset: {dataset},{dataloader.__name__}, Channels input: {channels_input}, Epochs: {epoch}, Batch size: {batch_size}, Learning rate: {learning_rate}, Latent size: {latent_size}")


[(('MNIST', 'models/checkpoints/SMALLAE/MNIST/', <class 'utils.dataloaders.full_dataloaders.DataLoaderMNIST'>, 1, 28), 1, 20, 128, 0.005, 10), (('MNIST', 'models/checkpoints/SMALLAE/MNIST/', <class 'utils.dataloaders.full_dataloaders.DataLoaderMNIST'>, 1, 28), 1, 20, 128, 0.005, 30), (('MNIST', 'models/checkpoints/SMALLAE/MNIST/', <class 'utils.dataloaders.full_dataloaders.DataLoaderMNIST'>, 1, 28), 1, 20, 128, 0.005, 50), (('MNIST', 'models/checkpoints/SMALLAE/MNIST/', <class 'utils.dataloaders.full_dataloaders.DataLoaderMNIST'>, 1, 28), 2, 20, 128, 0.005, 10), (('MNIST', 'models/checkpoints/SMALLAE/MNIST/', <class 'utils.dataloaders.full_dataloaders.DataLoaderMNIST'>, 1, 28), 2, 20, 128, 0.005, 30), (('MNIST', 'models/checkpoints/SMALLAE/MNIST/', <class 'utils.dataloaders.full_dataloaders.DataLoaderMNIST'>, 1, 28), 2, 20, 128, 0.005, 50), (('MNIST', 'models/checkpoints/SMALLAE/MNIST/', <class 'utils.dataloaders.full_dataloaders.DataLoaderMNIST'>, 1, 28), 3, 20, 128, 0.005, 10), (('MN

100%|██████████| 36/36 [00:00<00:00, 151906.38it/s]

Dataset: MNIST,DataLoaderMNIST, Channels input: 1, Epochs: 20, Batch size: 128, Learning rate: 0.005, Latent size: 10
Dataset: MNIST,DataLoaderMNIST, Channels input: 1, Epochs: 20, Batch size: 128, Learning rate: 0.005, Latent size: 30
Dataset: MNIST,DataLoaderMNIST, Channels input: 1, Epochs: 20, Batch size: 128, Learning rate: 0.005, Latent size: 50
Dataset: MNIST,DataLoaderMNIST, Channels input: 1, Epochs: 20, Batch size: 128, Learning rate: 0.005, Latent size: 10
Dataset: MNIST,DataLoaderMNIST, Channels input: 1, Epochs: 20, Batch size: 128, Learning rate: 0.005, Latent size: 30
Dataset: MNIST,DataLoaderMNIST, Channels input: 1, Epochs: 20, Batch size: 128, Learning rate: 0.005, Latent size: 50
Dataset: MNIST,DataLoaderMNIST, Channels input: 1, Epochs: 20, Batch size: 128, Learning rate: 0.005, Latent size: 10
Dataset: MNIST,DataLoaderMNIST, Channels input: 1, Epochs: 20, Batch size: 128, Learning rate: 0.005, Latent size: 30
Dataset: MNIST,DataLoaderMNIST, Channels input: 1, Epoch




In [2]:
iterations_ = tqdm(combinations)
for (dataset, path, dataloader, channels_input, size_input), seed, num_epochs, batch_size, learning_rate, latent_dim in iterations_:
    augmentations = [transforms.ToTensor()]
    dataloader = dataloader(batch_size=batch_size, transformation=augmentations, seed=seed, shuffle_train_flag = True)
    test_loader = dataloader.get_test_loader()
    train_loader = dataloader.get_train_loader()
    config = {
        'model_name': 'PCKTAE',
        'dataset': dataset,
        'weight_var': 1,
        'weight_mean': 0,
        'seed': seed,
        'batch_size': batch_size,
        'num_epochs': num_epochs,
        'learning_rate': learning_rate,
        'path': path
    }
    
    torch.manual_seed(config['seed'])
    model = PocketAutoencoder(hidden_dim=latent_dim, n_input_channels=channels_input, input_size=size_input)
    model.to(DEVICE)
    optimizer = Adam(model.parameters(), lr=config['learning_rate'], weight_decay=1e-4)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)

    for epoch in range(config['num_epochs']):
        overall_loss = 0
        model.train()  # Set the model to training mode
        
        for batch_idx, (x, _) in enumerate(train_loader):
            x = x.to(DEVICE)
            optimizer.zero_grad()
            loss = model.training_step(x)
            overall_loss += loss.item()
            loss.backward()
            optimizer.step()
        
        avg_loss = overall_loss / (len(train_loader) * batch_size)
        new_row = pd.DataFrame({'Dataset': [config['dataset']],
                                'Model': [config['model_name']],
                                'Seed': [config['seed']],
                                'Latent Size': [latent_dim],
                                'Loss': [avg_loss]})
        loss_dataset = pd.concat([loss_dataset, new_row], ignore_index=True)
        iterations_.set_description(f"Epoch: {epoch}, Loss: {avg_loss}")
    
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient calculation
        test_loss = 0
        for x_test, _ in test_loader:
            x_test = x_test.to(DEVICE)
            test_loss += model.validation_step(x_test).item()
        
        avg_test_loss = test_loss / (len(test_loader) * batch_size)
        scheduler.step(avg_test_loss)  # Update the learning rate based on the test loss
        new_row = pd.DataFrame({'Dataset': [config['dataset']],
                                'Model': [config['model_name']],
                                'Seed': [config['seed']],
                                'Latent Size': [latent_dim],
                                'Loss': [avg_test_loss]})
        loss_dataset = pd.concat([loss_dataset, new_row], ignore_index=True)
        iterations_.set_description(f"Test Loss: {avg_test_loss}")    
    
    # Save the model
    name = f"{config['dataset']}_{config['model_name']}_{latent_dim}_{config['seed']}.pth"
    print(name)
    path = config['path'] + name
    torch.save(model.state_dict(), path)

loss_dataset.to_csv('models/checkpoints/SMALLAE/lossesFMIST.csv', index=False)


  loss_dataset = pd.concat([loss_dataset, new_row], ignore_index=True)
Test Loss: 0.00020605821753380514:   3%|▎         | 1/36 [02:16<1:19:20, 136.01s/it]

MNIST_PCKTAE_10_1.pth


Test Loss: 0.00011217213964128066:   6%|▌         | 2/36 [04:36<1:18:27, 138.44s/it]      

MNIST_PCKTAE_30_1.pth


Test Loss: 9.17200231924653e-05:   8%|▊         | 3/36 [06:51<1:15:25, 137.13s/it]        

MNIST_PCKTAE_50_1.pth


Test Loss: 0.00020088920372691524:  11%|█         | 4/36 [09:03<1:12:05, 135.18s/it]      

MNIST_PCKTAE_10_2.pth


Test Loss: 0.00011288747366420523:  14%|█▍        | 5/36 [11:23<1:10:36, 136.65s/it]      

MNIST_PCKTAE_30_2.pth


Test Loss: 9.620922751712931e-05:  17%|█▋        | 6/36 [13:43<1:08:57, 137.90s/it]       

MNIST_PCKTAE_50_2.pth


Test Loss: 0.0002197606333964681:  19%|█▉        | 7/36 [16:03<1:06:56, 138.49s/it]       

MNIST_PCKTAE_10_3.pth


Test Loss: 0.00012516352709801277:  22%|██▏       | 8/36 [18:18<1:04:08, 137.44s/it]      

MNIST_PCKTAE_30_3.pth


Test Loss: 8.790682624772049e-05:  25%|██▌       | 9/36 [20:33<1:01:29, 136.65s/it]       

MNIST_PCKTAE_50_3.pth


Test Loss: 0.00020999202296238014:  28%|██▊       | 10/36 [22:46<58:41, 135.45s/it]       

FMNIST_PCKTAE_10_1.pth


Test Loss: 0.00016339955581764725:  31%|███       | 11/36 [24:59<56:11, 134.85s/it]      

FMNIST_PCKTAE_30_1.pth


Test Loss: 0.00015180893360248096:  33%|███▎      | 12/36 [27:13<53:51, 134.63s/it]      

FMNIST_PCKTAE_50_1.pth


Test Loss: 0.00020394296900375121:  36%|███▌      | 13/36 [29:26<51:26, 134.21s/it]      

FMNIST_PCKTAE_10_2.pth


Test Loss: 0.00020273912331837972:  39%|███▉      | 14/36 [31:41<49:13, 134.26s/it]      

FMNIST_PCKTAE_30_2.pth


Test Loss: 0.0001767902101590475:  42%|████▏     | 15/36 [33:55<46:59, 134.26s/it]       

FMNIST_PCKTAE_50_2.pth


Test Loss: 0.00020252968644416785:  44%|████▍     | 16/36 [36:08<44:39, 134.00s/it]      

FMNIST_PCKTAE_10_3.pth


Test Loss: 0.00018998811756779405:  47%|████▋     | 17/36 [38:23<42:27, 134.06s/it]      

FMNIST_PCKTAE_30_3.pth


Test Loss: 0.00014130567967496766:  50%|█████     | 18/36 [40:37<40:14, 134.16s/it]      

FMNIST_PCKTAE_50_3.pth


Test Loss: 0.00016098348880658233:  53%|█████▎    | 19/36 [43:14<39:56, 140.97s/it]     

CIFAR10_PCKTAE_100_1.pth


Test Loss: 0.00019003859981953439:  56%|█████▌    | 20/36 [45:55<39:12, 147.02s/it]     

CIFAR10_PCKTAE_500_1.pth


Test Loss: 0.00021873708778279068:  58%|█████▊    | 21/36 [48:52<39:02, 156.15s/it]     

CIFAR10_PCKTAE_1000_1.pth


Test Loss: 0.00016339024388131132:  61%|██████    | 22/36 [51:33<36:45, 157.55s/it]     

CIFAR10_PCKTAE_100_2.pth


Test Loss: 0.00021887839866855147:  64%|██████▍   | 23/36 [54:21<34:48, 160.66s/it]     

CIFAR10_PCKTAE_500_2.pth


Test Loss: 0.00034671796734822043:  67%|██████▋   | 24/36 [57:23<33:24, 167.08s/it]     

CIFAR10_PCKTAE_1000_2.pth


Test Loss: 0.0001702678116860717:  69%|██████▉   | 25/36 [1:00:06<30:24, 165.89s/it]      

CIFAR10_PCKTAE_100_3.pth


Test Loss: 0.00036423591730373473:  72%|███████▏  | 26/36 [1:02:58<27:56, 167.69s/it]     

CIFAR10_PCKTAE_500_3.pth


Test Loss: 0.00015172825391622426:  75%|███████▌  | 27/36 [1:06:10<26:14, 174.97s/it]     

CIFAR10_PCKTAE_1000_3.pth


Test Loss: 0.00016156063692149104:  78%|███████▊  | 28/36 [1:08:56<22:57, 172.17s/it]     

CIFAR100_PCKTAE_100_1.pth


Test Loss: 0.0002996987450272552:  81%|████████  | 29/36 [1:11:49<20:08, 172.61s/it]      

CIFAR100_PCKTAE_500_1.pth


Test Loss: 0.00043580564301861805:  83%|████████▎ | 30/36 [1:15:06<17:58, 179.82s/it]     

CIFAR100_PCKTAE_1000_1.pth


Test Loss: 0.0001483652585987709:  86%|████████▌ | 31/36 [1:17:52<14:37, 175.59s/it]      

CIFAR100_PCKTAE_100_2.pth


Test Loss: 0.00019259809732254405:  89%|████████▉ | 32/36 [1:20:46<11:40, 175.05s/it]     

CIFAR100_PCKTAE_500_2.pth


Test Loss: 0.00019620391916000296:  92%|█████████▏| 33/36 [1:24:03<09:05, 181.86s/it]     

CIFAR100_PCKTAE_1000_2.pth


Test Loss: 0.0001611016564368848:  94%|█████████▍| 34/36 [1:26:49<05:54, 177.00s/it]      

CIFAR100_PCKTAE_100_3.pth


Test Loss: 0.00021326949372681305:  97%|█████████▋| 35/36 [1:29:43<02:56, 176.11s/it]     

CIFAR100_PCKTAE_500_3.pth


Test Loss: 0.000324873584528937: 100%|██████████| 36/36 [1:33:00<00:00, 155.01s/it]       

CIFAR100_PCKTAE_1000_3.pth





In [3]:
from pathlib import Path
import os
import itertools

os.chdir('/Users/federicoferoggio/Documents/vs_code/latent-communication')

import torch
import numpy as np
from tqdm import tqdm
from torchvision.datasets import MNIST
from torchvision.transforms import transforms
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr

from models.definitions.PCKTAE import PocketAutoencoder
from utils.dataloaders.full_dataloaders import DataLoaderMNIST, DataLoaderFashionMNIST, DataLoaderCIFAR10, DataLoaderCIFAR100
from utils.visualization import (
    visualize_mapping_error,
    visualize_latent_space_pca,
    plot_latent_space,
    highlight_cluster,
)
from utils.sampler import *
from optimization.fit_mapping import create_mapping
from utils.metrics import calculate_MSE_ssim_psnr
from utils.model import load_model, get_transformations

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Config:
    def __init__(self, **entries):
        self.__dict__.update(entries)

In [4]:
# Function to clear GPU memory
def clear_memory():
    torch.cuda.empty_cache()

def define_dataloader(file, file2, use_test_set=False):
    if file.strip("_")[0] != file2.strip("_")[0]:
        logging.error("The datasets are different")
    # Define the dataloaders
    name_dataset, name_model, size_of_the_latent, seed = file.strip(".pth").split("_")
    augumentation = get_transformations(name_model)
    if name_dataset.lower() == "mnist":
        dataloader = DataLoaderMNIST(transformation=augumentation, batch_size=64, seed=int(seed))
    if name_dataset.lower() == "fmnist":
        dataloader = DataLoaderFashionMNIST(transformation=augumentation,batch_size=64, seed=int(seed))
    if name_dataset.lower() == "cifar10":
        dataloader = DataLoaderCIFAR10(transformation=augumentation,batch_size=64, seed=int(seed))
    if name_dataset.lower() == "cifar100":
        dataloader = DataLoaderCIFAR100(transformation=augumentation,batch_size=64, seed=int(seed))
    if use_test_set:
        full_dataset_images, full_dataset_labels = dataloader.get_full_test_dataset()
    else:
        full_dataset_images, full_dataset_labels = dataloader.get_full_train_dataset()
    return full_dataset_images, full_dataset_labels, len(np.unique(full_dataset_labels.numpy()))


def calculate_and_save_mapping(model1, model2, sampling_strategy, sampled_images, parameters, file1, file2, transformations_database, num_samples, lamda, DEVICE):

    name_dataset1, name_model1, size_of_the_latent1, seed1 = file1.strip(".pth").split("_")
    name_dataset2, name_model2, size_of_the_latent2, seed2 = file2.strip(".pth").split("_")

    # Set the model to evaluation and sends them to the DEVICE 
    model1.to(torch.float32).to(DEVICE).eval()
    model2.to(torch.float32).to(DEVICE).eval()
    # Get latent of the sampled images
    latent_left_sampled_equally = model1.get_latent_space(sampled_images)
    latent_right_sampled_equally = model2.get_latent_space(sampled_images)
    latent_left_sampled_equally = latent_left_sampled_equally.to(torch.float32).cpu().detach().numpy()
    latent_right_sampled_equally = latent_right_sampled_equally.to(torch.float32).cpu().detach().numpy()
    # Create mapping and visualize
    cfg = Config(**parameters)
    mapping = create_mapping(cfg, latent_left_sampled_equally, latent_right_sampled_equally, do_print=False)
    mapping.fit()
    storage_path = f'results/transformations/mapping_files/{name_model2}/'
    Path(storage_path).mkdir(parents=True, exist_ok=True)
    filename = f"{file1.strip('.pth')}>{file2.strip('.pth')}>{cfg.mapping}_{num_samples}_{lamda}_{sampling_strategy}"
    mapping.save_results(storage_path + filename)
    transformations_database = pd.concat([transformations_database, pd.DataFrame({"model1": [file1], "model2": [file2], "mapping": [storage_path]})], ignore_index=True)
    return transformations_database

In [6]:
try:
    df_save_mappings = pd.read_csv("results/transformations/mapping_files/transfomations_index.csv")
except:
    df_save_mappings = pd.DataFrame(columns=["model1", "model2", "mapping"])

## Here is the part that you have to modify however you want
## Define directories where you want to ieratively create the mapping, and then write down the parameters you want to use
os.makedirs("../results/transformations/mapping_files/", exist_ok=True)

folder1 = "models/checkpoints/SMALLAE/FMNIST/"
folder2 = "models/checkpoints/SMALLAE/FMNIST/"
number_samples = [10,50,100,200,300]
mapping_list = ["Linear", "Affine"]
lamda_list = [0,0.01, 0.001]
use_test_set = False
filter1 = '_' #write here if you want that the processed files contain this string (example "_50_" to only process the files with latent size 50)
filter2 = '_' #write here if you want that the processed files contain this string (example "_50_" to only process the files with latent size 50)


## this autiomatically creates all teh possible setups with the paramenters and the files you speicified, and sets up the correct dataset
files1 = [f for f in os.listdir(folder1) if f.endswith(".pth") and filter1 in f]
files2 = [f for f in os.listdir(folder2) if f.endswith(".pth") and filter2 in f]
list_of_files = [(f1, f2) for f1, f2 in itertools.product(files1, files2) if f1 != f2]
combinations_parameters = list(itertools.product(number_samples, mapping_list, lamda_list))
pbar = tqdm(list(itertools.product(list_of_files, combinations_parameters)))
images, labels, n_classes = define_dataloader(files1[0], files2[0], use_test_set)
images = images.type(torch.float32)
labels = labels.type(torch.float32)

# Loop through combinations
for (file1, file2), (num_samples, mapping, lamda) in pbar:
    parameters = {"num_samples": num_samples, "mapping": mapping, "lamda": lamda} #This is done to go around some hydra stuff (<3 kai)
    name_dataset1, name_model1, size_of_the_latent1, seed1 = file1.strip(".pth").split("_")
    name_dataset2, name_model2, size_of_the_latent2, seed2 = file2.strip(".pth").split("_")

    model1 = load_model(model_name=name_model1, name_dataset=name_dataset1, latent_size=int(size_of_the_latent1), seed=int(seed1), model_path = folder1 + file1)
    model2 = load_model(model_name=name_model2, name_dataset=name_dataset2, latent_size=int(size_of_the_latent2), seed=int(seed2), model_path = folder1 + file2)
    pbar.set_description("Sampling equally per class")
    images_sampled_equally, labels_sampled_equally = sample_equally_per_class_images(num_samples, images, labels)
    pbar.set_description("Sampling removing outliers")
    images_sampled_max_distance, labels_sampled_drop_outliers = sample_removing_outliers(num_samples, images, labels, model2)
    pbar.set_description("Sampling worst classes")
    images_sampled_worst_classes, labels_sampled_worst_classes = sample_with_half_worst_classes_images(num_samples, images, labels, model2)
    pbar.set_description("Sampling convex hull")
    images_sampled_best_classes, labels_sampled_convex_hull = sample_convex_hulls_images(num_samples, images, labels, model1)
    pbar.set_description("Processing %s and %s" % (file1, file2))
    df_save_mappings = calculate_and_save_mapping(model1, model2, "equally", images_sampled_equally, parameters, file1, file2, df_save_mappings, num_samples, lamda, DEVICE)
    df_save_mappings = calculate_and_save_mapping(model1, model2, "outliers", images_sampled_max_distance, parameters, file1, file2, df_save_mappings, num_samples, lamda, DEVICE)
    df_save_mappings = calculate_and_save_mapping(model1, model2, "worst_classes", images_sampled_worst_classes, parameters, file1, file2, df_save_mappings, num_samples, lamda, DEVICE)
    df_save_mappings = calculate_and_save_mapping(model1, model2, "convex_hull", images_sampled_best_classes, parameters, file1, file2, df_save_mappings, num_samples, lamda, DEVICE)
    pbar.set_description("Processed %s and %s" % (file1, file2))
df_save_mappings.to_csv("../results/transformations/mapping_files/transfomations_index.csv", index=False)


Processing FMNIST_PCKTAE_50_1.pth and FMNIST_PCKTAE_30_1.pth:   7%|▋         | 144/2160 [3:07:32<43:45:30, 78.14s/it] 

Failure:interrupted





SolverError: Solver 'SCS' failed. Try another solver, or solve with verbose=True for more information.

In [None]:
try:
    df_save_mappings = pd.read_csv("results/transformations/mapping_files/transfomations_index.csv")
except:
    df_save_mappings = pd.DataFrame(columns=["model1", "model2", "mapping"])

## Here is the part that you have to modify however you want
## Define directories where you want to ieratively create the mapping, and then write down the parameters you want to use
os.makedirs("../results/transformations/mapping_files/", exist_ok=True)

folder1 = "models/checkpoints/SMALLAE/MNIST/"
folder2 = "models/checkpoints/SMALLAE/MNIST/"
number_samples = [10,50,100,200,300]
mapping_list = ["Linear", "Affine"]
lamda_list = [0,0.01, 0.001]
use_test_set = False
filter1 = '_' #write here if you want that the processed files contain this string (example "_50_" to only process the files with latent size 50)
filter2 = '_' #write here if you want that the processed files contain this string (example "_50_" to only process the files with latent size 50)


## this autiomatically creates all teh possible setups with the paramenters and the files you speicified, and sets up the correct dataset
files1 = [f for f in os.listdir(folder1) if f.endswith(".pth") and filter1 in f]
files2 = [f for f in os.listdir(folder2) if f.endswith(".pth") and filter2 in f]
list_of_files = [(f1, f2) for f1, f2 in itertools.product(files1, files2) if f1 != f2]
combinations_parameters = list(itertools.product(number_samples, mapping_list, lamda_list))
pbar = tqdm(list(itertools.product(list_of_files, combinations_parameters)))
images, labels, n_classes = define_dataloader(files1[0], files2[0], use_test_set)
images = images.type(torch.float32)
labels = labels.type(torch.float32)

# Loop through combinations
for (file1, file2), (num_samples, mapping, lamda) in pbar:
    parameters = {"num_samples": num_samples, "mapping": mapping, "lamda": lamda} #This is done to go around some hydra stuff (<3 kai)
    name_dataset1, name_model1, size_of_the_latent1, seed1 = file1.strip(".pth").split("_")
    name_dataset2, name_model2, size_of_the_latent2, seed2 = file2.strip(".pth").split("_")

    model1 = load_model(model_name=name_model1, name_dataset=name_dataset1, latent_size=int(size_of_the_latent1), seed=int(seed1), model_path = folder1 + file1)
    model2 = load_model(model_name=name_model2, name_dataset=name_dataset2, latent_size=int(size_of_the_latent2), seed=int(seed2), model_path = folder1 + file2)
    pbar.set_description("Sampling equally per class")
    images_sampled_equally, labels_sampled_equally = sample_equally_per_class_images(num_samples, images, labels)
    pbar.set_description("Sampling removing outliers")
    images_sampled_max_distance, labels_sampled_drop_outliers = sample_removing_outliers(num_samples, images, labels, model2)
    pbar.set_description("Sampling worst classes")
    images_sampled_worst_classes, labels_sampled_worst_classes = sample_with_half_worst_classes_images(num_samples, images, labels, model2)
    pbar.set_description("Sampling convex hull")
    images_sampled_best_classes, labels_sampled_convex_hull = sample_convex_hulls_images(num_samples, images, labels, model1)
    pbar.set_description("Processing %s and %s" % (file1, file2))
    df_save_mappings = calculate_and_save_mapping(model1, model2, "equally", images_sampled_equally, parameters, file1, file2, df_save_mappings, num_samples, lamda, DEVICE)
    df_save_mappings = calculate_and_save_mapping(model1, model2, "outliers", images_sampled_max_distance, parameters, file1, file2, df_save_mappings, num_samples, lamda, DEVICE)
    df_save_mappings = calculate_and_save_mapping(model1, model2, "worst_classes", images_sampled_worst_classes, parameters, file1, file2, df_save_mappings, num_samples, lamda, DEVICE)
    df_save_mappings = calculate_and_save_mapping(model1, model2, "convex_hull", images_sampled_best_classes, parameters, file1, file2, df_save_mappings, num_samples, lamda, DEVICE)
    pbar.set_description("Processed %s and %s" % (file1, file2))
df_save_mappings.to_csv("../results/transformations/mapping_files/transfomations_index.csv", index=False)