In [7]:
import os
import torch
import torchvision.transforms as transforms
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from utils.dataloaders.dataloader_mnist_single import DataLoaderMNIST
from models.definitions.PocketAutoencoder import PocketAutoencoder
import pandas as pd
from tqdm import tqdm
import itertools

os.chdir('/Users/federicoferoggio/Documents/vs_code/latent-communication')


# Initialize DataFrame with additional columns
loss_dataset = pd.DataFrame(columns=['Dataset', 'Model', 'Seed', 'Epochs', 'Learning Rate', 'Batch Size', 'Loss'])

datasets_list = ['MNIST']
seeds = [1, 2, 3, 4]
paths = ['models/checkpoints/SMALLAE/MNIST/']
dataloader_l = [DataLoaderMNIST]
epochs = [5, 20]
batch_sizes = [64, 128]
learning_rates = [0.01, 0.001]

combinations1 = [datasets_list, seeds, paths, dataloader_l, epochs, batch_sizes, learning_rates]
combinations1 = list(itertools.product(*combinations1))

datasets_list = ['MNIST']
seeds = [3, 4]
paths = ['models/checkpoints/SMALLAE/MNIST/']
dataloader_l = [DataLoaderMNIST]
epochs = [1, 10]
batch_sizes = [32, 64, 128]
learning_rates = [0.01, 0.001, 0.0001]

combinations2 = [datasets_list, seeds, paths, dataloader_l, epochs, batch_sizes, learning_rates]
combinations2 = list(itertools.product(*combinations2))

combinations = combinations1 + combinations2

DEVICE = torch.device("mps")
augmentations = [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
print(len(combinations))

68


In [None]:
for dataset, seed, paths, dataloader, epochs, batch_size, learning_rate in tqdm(combinations):
    dataloader = dataloader(batch_size=batch_size, transformation=augmentations, seed=seed)
    test_loader = dataloader.get_test_loader()
    train_loader = dataloader.get_train_loader()
    config = {
        'model_name': 'SMLLAE',
        'dataset': dataset,
        'weight_var': 1,
        'weight_mean': 0,
        'seed': seed,
        'batch_size': batch_size,
        'num_epochs': epochs,
        'learning_rate': learning_rate,
        'path': paths
    }
    
    torch.manual_seed(config['seed'])
    model = PocketAutoencoder()
    model.to(DEVICE)
    optimizer = Adam(model.parameters(), lr=config['learning_rate'], weight_decay=1e-4)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)
    
    for epoch in range(config['num_epochs']):
        overall_loss = 0
        model.train()  # Set the model to training mode
        
        for batch_idx, (x, _) in enumerate(train_loader):
            x = x.to(DEVICE)
            optimizer.zero_grad()
            loss = model.training_step(x)
            overall_loss += loss.item()
            loss.backward()
            optimizer.step()
        
        avg_loss = overall_loss / (len(train_loader) * batch_size)
        new_row = pd.DataFrame({'Dataset': [config['dataset']],
                                'Model': [config['model_name']],
                                'Seed': [config['seed']],
                                'Epochs': [epoch],
                                'Learning Rate': [config['learning_rate']],
                                'Batch Size': [config['batch_size']],
                                'Loss': [avg_loss]})
        loss_dataset = pd.concat([loss_dataset, new_row], ignore_index=True)
        print("\tEpoch", epoch + 1, "complete!", "\tAverage Loss: ", avg_loss)
    
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient calculation
        test_loss = 0
        for x_test, _ in test_loader:
            x_test = x_test.to(DEVICE)
            test_loss += model.validation_step(x_test).item()
        
        avg_test_loss = test_loss / (len(test_loader) * batch_size)
        scheduler.step(avg_test_loss)  # Update the learning rate based on the test loss
        new_row = pd.DataFrame({'Dataset': [config['dataset']],
                                'Model': [config['model_name']],
                                'Seed': [config['seed']],
                                'Epochs': ['Test'],
                                'Learning Rate': [config['learning_rate']],
                                'Batch Size': [config['batch_size']],
                                'Loss': [avg_test_loss]})
        loss_dataset = pd.concat([loss_dataset, new_row], ignore_index=True)
    
    
    # Save the model
    name = f"{config['dataset']}_{config['model_name']}_{config['learning_rate']}_{config['batch_size']}_{config['num_epochs']}_{config['seed']}.pth"
    print(name)
    path = config['path'] + name
    torch.save(model.state_dict(), path)

loss_dataset.to_csv('models/checkpoints/SMALLAE/losses.csv', index=False)


In [8]:
import os
import torch
import torchvision.transforms as transforms
from torch.optim import Adam
from utils.dataloaders.dataloader_mnist_single import DataLoaderMNIST
from models.definitions.PocketAutoencoder import PocketAutoencoder
from models.definitions.PocketAutoencoder import PocketAutoencoder

import os

os.chdir("/Users/federicoferoggio/Documents/vs_code/latent-communication")

# Assuming you have already defined and initialized test_loader and DEVICE

losses_per_class = pd.DataFrame(columns=["Model", "Class", "Loss"])

DataLoaders = DataLoaderMNIST
batch_size = 64
augmentations = [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]

dataloader = DataLoaders(batch_size=batch_size, transformation=augmentations)

test_loader = dataloader.get_test_loader()
train_loader = dataloader.get_train_loader()

for file in os.listdir("models/checkpoints/SMALLAE/MNIST/"):
    if file.endswith(".pth"):  # Check if the file is a PyTorch model file
        # Load the model
        model = PocketAutoencoder()
        model.load_state_dict(torch.load("models/checkpoints/SMALLAE/MNIST/" + file))
        print("Model loaded:", file)

        # Iterate through each class (0 to 9)
        for n in range(10):
            desired_class = n  # Specify the class you want to filter
            filtered_samples = []

            # Filter samples from the test loader based on the desired class
            for data, label in test_loader:
                indices = torch.nonzero(label == desired_class, as_tuple=False)
                if indices.numel() > 0:
                    for idx in indices:
                        filtered_samples.append((data[idx], label[idx]))

            test_loss_filtered = 0  # Initialize test loss for the current class

            with torch.no_grad():  # Disable gradient calculation
                for x_test, _ in filtered_samples:
                    test_loss_filtered += model.validation_step(x_test).item()

            # Calculate average reconstruction loss for the current class
            if len(filtered_samples) > 0:
                avg_loss = test_loss_filtered / len(filtered_samples)
            else:
                avg_loss = (
                    0  # Handle the case when there are no samples for the current class
                )

            print("\tTest Loss for class", n, ":", avg_loss)
            # Concatenate the results to the DataFrame
            losses_per_class = pd.concat(
                [
                    losses_per_class,
                    pd.DataFrame({"Model": [file], "Class": [n], "Loss": [avg_loss]}),
                ],
                ignore_index=True,
            )

# Save the results to a CSV file
losses_per_class.to_csv("models/checkpoints/SMALLAE/losses_per_class.csv", index=False)

NameError: name 'seed' is not defined

In [9]:
import os
import torch
import pandas as pd
import torchvision.transforms as transforms
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
from models.definitions.PocketAutoencoder import PocketAutoencoder

# Assuming you have already defined and initialized test_loader and DEVICE

os.chdir("/Users/federicoferoggio/Documents/vs_code/latent-communication")
losses_per_class = pd.DataFrame(columns=["Model", "Class", "MSE", "SSIM", "PSNR"])


# Function to calculate SSIM and PSNR
def calculate_ssim_psnr(original, reconstructed, data_range=1.0):
    original_np = original.cpu().numpy().squeeze()
    reconstructed_np = reconstructed.cpu().numpy().squeeze()
    ssim_value = ssim(original_np, reconstructed_np, data_range=data_range)
    psnr_value = psnr(original_np, reconstructed_np, data_range=data_range)
    return ssim_value, psnr_value


# Iterate through each model file in the specified directory
for file in os.listdir("models/checkpoints/SMALLAE/MNIST/"):
    if file.endswith(".pth"):  # Check if the file is a PyTorch model file
        # Load the model
        model = PocketAutoencoder()
        model.load_state_dict(
            torch.load("models/checkpoints/SMALLAE/MNIST/" + file, map_location=DEVICE),
            strict=False,
        )
        model.to(DEVICE)
        print("Model loaded:", file)

        # Iterate through each class (0 to 9)
        for n in range(10):
            desired_class = n  # Specify the class you want to filter
            filtered_samples = []

            # Filter samples from the test loader based on the desired class
            for data, label in test_loader:
                indices = torch.nonzero(label == desired_class, as_tuple=False)
                if indices.numel() > 0:
                    for idx in indices:
                        filtered_samples.append((data[idx], label[idx]))

            mse_loss_filtered = 0  # Initialize MSE loss for the current class
            ssim_loss_filtered = 0  # Initialize SSIM loss for the current class
            psnr_loss_filtered = 0  # Initialize PSNR loss for the current class

            with torch.no_grad():  # Disable gradient calculation
                for x_test, _ in filtered_samples:
                    x_test = x_test.to(DEVICE)
                    x_reconstructed = model(x_test)
                    mse_loss_filtered += torch.nn.functional.mse_loss(
                        x_reconstructed, x_test
                    ).item()
                    ssim_value, psnr_value = calculate_ssim_psnr(
                        x_test, x_reconstructed
                    )
                    ssim_loss_filtered += ssim_value
                    psnr_loss_filtered += psnr_value

            # Calculate average losses for the current class
            num_samples = len(filtered_samples)
            if num_samples > 0:
                avg_mse_loss = mse_loss_filtered / num_samples
                avg_ssim_loss = ssim_loss_filtered / num_samples
                avg_psnr_loss = psnr_loss_filtered / num_samples
            else:
                avg_mse_loss = (
                    0  # Handle the case when there are no samples for the current class
                )
                avg_ssim_loss = 0
                avg_psnr_loss = 0

            print(
                f"\tMetrics for class {n} - MSE: {avg_mse_loss}, SSIM: {avg_ssim_loss}, PSNR: {avg_psnr_loss}"
            )
            # Concatenate the results to the DataFrame
            losses_per_class = pd.concat(
                [
                    losses_per_class,
                    pd.DataFrame(
                        {
                            "Model": [file],
                            "Class": [n],
                            "MSE": [avg_mse_loss],
                            "SSIM": [avg_ssim_loss],
                            "PSNR": [avg_psnr_loss],
                        }
                    ),
                ],
                ignore_index=True,
            )

# Save the results to a CSV file
losses_per_class.to_csv("models/checkpoints/SMALLAE/MNIST/more_metrics.csv", index=False)

Model loaded: MNIST_SMLLAE_0.001_128_5_4.pth


NameError: name 'test_loader' is not defined

In [3]:
order_check = []

pth_files = [file for file in os.listdir('models/checkpoints/SMALLAE/MNIST/') if os.path.isfile(os.path.join('models/checkpoints/SMALLAE/MNIST/', file)) and file.endswith('.pth')]

for file in tqdm(pth_files):
    model_order = []
    latent_space_dict = {}
    model1 = PocketAutoencoder()
    model1.load_state_dict(torch.load(f'models/checkpoints/SMALLAE/MNIST/{file}'))
    model1.to(DEVICE)
    model1.eval()
    
    DataLoaders = DataLoaderMNIST
    indices = range(60000)  # Assuming you want to use the first 40,000 images
    data_loader = DataLoaderMNIST(128, augmentations, indices=indices, seed=0)
    train_loader = data_loader.get_train_loader()

    # Get all images from test_loader and convert them to latent space
    for batch_idx, (images, labels) in enumerate(train_loader):
        if batch_idx == len(train_loader) - 1:
            break  # Skip the last batch
        images = images.to(DEVICE)
        with torch.no_grad():
            latent_space = model1.get_latent_space(images)

        # Convert tensor to numpy array
        latent_space_np = latent_space.detach().cpu().numpy()
        labels_np = labels.detach().cpu().numpy()
        
        # Store in dictionary
        for numm, idx in enumerate(range(batch_idx*128, (batch_idx+1)*128)):
            latent_space_dict[idx] = (latent_space_np[numm], labels_np[numm])
            model_order.append(labels_np[numm])
    order_check.append(model_order)
    torch.save(latent_space_dict, 'models/checkpoints/SMALLAE/MNIST/LATENTS/' + str(file).replace('.pth', '_latent_space.pth'))

## Check they are in the same order
for i in range(len(order_check)-1):
    if order_check[i] != order_check[i+1]:
        print('Order is not the same')
        break

print(len(order_check))

NameError: name 'tqdm' is not defined