In [2]:
from VariationalAutoDecoder import VariationalAutoDecoder as VAD
from VAD_Trainer import VAD_Trainer
import utils
from evaluate import evaluate_model
import torch
import torch.optim as optim
import torch.nn as nn
import csv
import time
import random
import itertools

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Create DataLoaders

In [3]:
train_ds, train_dl, test_ds, test_dl = utils.create_dataloaders(data_path="dataset" ,batch_size=64)

## Train Auto Decoder

In [3]:
latent_dims = [16, 32, 64, 128]
betas = [1e5, 5e5, 1e6, 5e6]
VADs = [VAD(latent_dim=dim, device=device) for (dim,_) in list(itertools.product(latent_dims, betas))]
trainers = [VAD_Trainer(var_decoder=VADs[i], dataloader=train_dl, latent_dim=dim, beta=beta, device=device, lr=1e-3)
            for i,(dim,beta) in enumerate(list(itertools.product(latent_dims, betas)))]

In [None]:
# Initialize the results list to hold all the data
num_test_samples = len(train_dl.dataset)

# latents_list = [torch.nn.Parameter(torch.stack([temp_latents[label,:] for label in train_dl.dataset.y])).to(device) for i in range(len(VADs))]
# optimizers = [optim.Adam([latents], lr=1e-3) for latents in latents_list]

# Save results to a CSV file
csv_file_path = 'results_VAD_omri.csv'

# Write header to the CSV file first
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    header = ['Index'] + [f'Epoch {i+1} Loss' for i in range(500)] + ['Final Train Loss']
    writer.writerow(header)

# Main training and evaluation loop
for index, trainer in enumerate(trainers):
    # mu = torch.randn(len(train_dl.dataset), trainer.latent_dim, device=device, requires_grad=True)
    # sigma = torch.randn(len(train_dl.dataset), trainer.latent_dim, device=device, requires_grad=True)
    # latents = torch.nn.parameter.Parameter(torch.stack([mu, sigma], dim=1)).to(device)
    optimizer = optim.Adam([trainer.latents], lr=1e-3)
    
    start_time = time.time()  # Record the start time
    train_loss = trainer.train(num_epochs=500)  # Train the model
    end_time = time.time()  # Record the end time
    
    elapsed_time = end_time - start_time  # Calculate elapsed time
    print(f"Trainer {index} has finished training in {elapsed_time:.2f} seconds.")

    start_time = time.time()  # Record the start time
    test_loss = evaluate_model(model=VADs[index], test_dl=train_dl, opt=optimizer, latents=trainer.latents, epochs=500, device=device) 
    end_time = time.time()  # Record the end time
    
    elapsed_time = end_time - start_time  # Calculate elapsed time
    print(f"AD {index} has finished train evaluation in {elapsed_time:.2f} seconds.")

    # Prepare the row to be saved
    row = [index] + train_loss + [test_loss]

    # Append results to the CSV file after each iteration
    with open(csv_file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row)

print(f"Results saved to {csv_file_path}.")

In [5]:
for i in range(len(trainers)):
    latents = VADs[i].reparameterize(trainers[i].latents)
    utils.plot_tsne(train_ds, latents, f"tsne_omri_{i}")

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

<Figure size 800x600 with 0 Axes>

In [None]:
for i in range(len(trainers_ft)):
    utils.plot_tsne(train_ds, trainers_ft[i].latents, f"tsne_kl_ft_{i}")

## Best model

In [None]:
VAD_best = VAD(latent_dim=128, device=device)
trainer_best = VAD_Trainer(var_decoder=VAD_best, dataloader=train_dl, latent_dim=128, beta=5e6, device=device, lr=0.001)
_ = trainer_best.train(num_epochs=1000)

In [8]:
num_test_samples = len(train_dl.dataset)
#latents_check = torch.nn.Parameter(torch.randn(num_test_samples, trainer_best.latent_dim).to(device))
opt = optim.Adam([trainer_best.latents], lr=1e-3)
evaluate_loss = evaluate_model(model=VAD_best, test_dl=train_dl, opt=opt, latents=trainer_best.latents, epochs=1000, device=device)

In [9]:
print(evaluate_loss)

0.2757660485804081


In [10]:
# means = trainer_best.latents[:,0,:]
# stds = trainer_best.latents[:,1,:]
# print(stds.var())
# print(means.var())
# latents = torch.normal(means, stds.pow(2))
latents = VAD_best.reparameterize(trainer_best.latents)
# print(latents.shape)
utils.plot_tsne(train_ds, latents, f"tsne_omri_best?")

<Figure size 800x600 with 0 Axes>

## Sample specific vectors

In [5]:
num_test_samples = len(test_dl.dataset)
mu_test = torch.randn(num_test_samples, VAD_best.latent_dim, device=device, requires_grad=True)
sigma_test = torch.randn(num_test_samples, VAD_best.latent_dim, device=device, requires_grad=True)
test_latents = torch.nn.parameter.Parameter(torch.stack([mu_test, sigma_test], dim=1)).to(device)
opt = optim.Adam([test_latents], lr=1e-3)

In [7]:
test_loss = evaluate_model(model=VAD_best, test_dl=test_dl, opt=opt, latents=test_latents, epochs=1000, device=device)
print(f"AD has finished test evaluation with a test loss of {test_loss}.")

AD has finished test evaluation with a test loss of 0.272798553109169.


In [10]:
final_test_latents = VAD_best.reparameterize(test_latents)
utils.plot_tsne(test_ds, final_test_latents, f"tsne_test_omri")

<Figure size 800x600 with 0 Axes>

In [13]:
# Randomly sample 5 indices from the test dataset
random.seed(6)
sampled_indices = random.sample(range(1000), 5)

# Extract the corresponding vectors (input data) and their labels
#sampled_latents = [final_test_latents[i] for i in sampled_indices]

# Convert to a single tensor (optional)
# sampled_latents_tensor = torch.stack(sampled_latents)
random_latents_tensor = 10000 * torch.randn_like(test_latents[sampled_indices])

sampled_test_images = VAD_best(test_latents[sampled_indices]).view(-1, 1, 28, 28)
random_test_images = VAD_best(random_latents_tensor).view(-1, 1, 28, 28)

utils.save_images(sampled_test_images, "sampled_test_images_VAD_Omri.png")
utils.save_images(random_test_images, "random_test_images_VAD_Omri.png")

## Interpolation

In [None]:
import numpy as np
random.seed(44)
sampled_indices = random.sample(range(len(latents_best)), 2)
sampled_latents = [latents_best[i] for i in sampled_indices]
weights = np.linspace(0, 1, 7)
interpolated_latents = [w * sampled_latents[0] + (1 - w) * sampled_latents[1] for w in weights]
interpolated_latents_tensor = torch.stack(interpolated_latents)
interpolated_images = VAD_best(interpolated_latents_tensor).view(-1, 1, 28, 28)
utils.save_images(interpolated_images, "interpolated_images.png")