In [6]:
import AutoDecoder, AD_Trainer
import utils
from evaluate import evaluate_model
import torch
import torch.optim as optim
import torch.nn as nn
import csv
import time
import random

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Create DataLoaders

In [3]:
train_ds, train_dl, test_ds, test_dl = utils.create_dataloaders(data_path="dataset" ,batch_size=64)

## Train Auto Decoder

In [10]:
architectures = [
    # 1. Simple Architecture (Latent space: 64)
    nn.Sequential(
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, 256),
        nn.ReLU(),
        nn.Linear(256, 512),
        nn.ReLU(),
        nn.Linear(512, 784)
    ),
    
    # 2. Deeper Architecture (Latent space: 32)
    nn.Sequential(
        nn.Linear(32, 128),
        nn.LeakyReLU(),
        nn.Linear(128, 256),
        nn.LeakyReLU(),
        nn.Linear(256, 512),
        nn.LeakyReLU(),
        nn.Linear(512, 1024),
        nn.LeakyReLU(),
        nn.Linear(1024, 784)
    ),
    
    # 3. Wider Architecture (Latent space: 128)
    nn.Sequential(
        nn.Linear(128, 512),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(512, 1024),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(1024, 784)
    ),
    
    # 4. Progressive Architecture (Latent space: 16)
    nn.Sequential(
        nn.Linear(16, 64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, 256),
        nn.ReLU(),
        nn.Linear(256, 512),
        nn.ReLU(),
        nn.Linear(512, 784)
    ),
    
    # 5. Bottlenecked Architecture (Latent space: 10)
    nn.Sequential(
        nn.Linear(10, 64),
        nn.ReLU(),
        nn.Linear(64, 256),
        nn.ReLU(),
        nn.Linear(256, 784)
    )
]

latent_dims = [dim for dim in [64, 32, 128, 16, 10] for _ in range(5)]
auto_decoders = [AutoDecoder.AutoDecoder(arch) for arch in architectures for _ in range(5)]
learning_rates = [lr for lr in [0.001, 0.0005, 0.0001, 0.002, 0.005] for _ in range(5)]
trainers = [AD_Trainer.AD_Trainer(decoder=auto_decoders[i], dataloader=train_dl, latent_dim=latent_dims[i], device=device, lr=learning_rates[i]) for i in range(len(latent_dims))]

In [None]:
# Initialize the results list to hold all the data
num_test_samples = len(test_dl.dataset)

# Create latent parameters and optimizers for each trainer
latents_list = [torch.nn.Parameter(torch.randn(num_test_samples, trainers[5*i].latent_dim).to(device)) for i in range(5) for _ in range(5)]
optimizers = [optim.Adam([latents], lr=1e-3) for latents in latents_list]

# Save results to a CSV file
csv_file_path = 'results.csv'

# Write header to the CSV file first
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    header = ['Index'] + [f'Epoch {i+1} Loss' for i in range(200)] + ['Final Test Loss']
    writer.writerow(header)

# Main training and evaluation loop
for index, trainer in enumerate(trainers):
    start_time = time.time()  # Record the start time
    train_loss = trainer.train(num_epochs=200)  # Train the model
    end_time = time.time()  # Record the end time
    
    elapsed_time = end_time - start_time  # Calculate elapsed time
    print(f"Trainer {index} has finished training in {elapsed_time:.2f} seconds.")

    start_time = time.time()  # Record the start time
    test_loss = evaluate_model(model=auto_decoders[index], test_dl=test_dl, opt=optimizers[index], latents=latents_list[index], epochs=100, device=device) 
    end_time = time.time()  # Record the end time
    
    elapsed_time = end_time - start_time  # Calculate elapsed time
    print(f"AD {index} has finished test evaluation in {elapsed_time:.2f} seconds.")

    # Prepare the row to be saved
    row = [index] + train_loss + [test_loss]

    # Append results to the CSV file after each iteration
    with open(csv_file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row)

print(f"Results saved to {csv_file_path}.")

## Sample specific vectors

In [None]:
arch = nn.Sequential(
        nn.Linear(256, 512),
        nn.BatchNorm1d(512),
        nn.ReLU(),
        nn.Dropout(0.25),
        nn.Linear(512, 1024),
        nn.BatchNorm1d(1024),
        nn.ReLU(),
        nn.Linear(1024, 2048),
        nn.BatchNorm1d(2048),
        nn.ReLU(),
        nn.Linear(2048, 4096),
        nn.ReLU(),
        nn.Linear(4096, 784)
    )
decoder = AutoDecoder.AutoDecoder(arch)
trainer = AD_Trainer.AD_Trainer(decoder=decoder, dataloader=train_dl, latent_dim=256, device=device, lr=5e-3)

In [17]:
trainer.train(num_epochs=200)
num_test_samples = len(test_dl.dataset)
latents = torch.nn.Parameter(torch.randn(num_test_samples, trainer.latent_dim).to(device))
opt = optim.Adam([latents], lr=1e-3)

Epoch [1/200], Loss: 0.0718
Epoch [2/200], Loss: 0.0713
Epoch [3/200], Loss: 0.0686
Epoch [4/200], Loss: 0.0682
Epoch [5/200], Loss: 0.0726
Epoch [6/200], Loss: 0.0740
Epoch [7/200], Loss: 0.0714
Epoch [8/200], Loss: 0.0662
Epoch [9/200], Loss: 0.0631
Epoch [10/200], Loss: 0.0636
Epoch [11/200], Loss: 0.0643
Epoch [12/200], Loss: 0.0651
Epoch [13/200], Loss: 0.0648
Epoch [14/200], Loss: 0.0687
Epoch [15/200], Loss: 0.0705
Epoch [16/200], Loss: 0.0666
Epoch [17/200], Loss: 0.0629
Epoch [18/200], Loss: 0.0643
Epoch [19/200], Loss: 0.0640
Epoch [20/200], Loss: 0.0629
Epoch [21/200], Loss: 0.0645
Epoch [22/200], Loss: 0.0647
Epoch [23/200], Loss: 0.0669
Epoch [24/200], Loss: 0.0721
Epoch [25/200], Loss: 0.0681
Epoch [26/200], Loss: 0.0658
Epoch [27/200], Loss: 0.0668
Epoch [28/200], Loss: 0.0650
Epoch [29/200], Loss: 0.0656
Epoch [30/200], Loss: 0.0694
Epoch [31/200], Loss: 0.0635
Epoch [32/200], Loss: 0.0639
Epoch [33/200], Loss: 0.0656
Epoch [34/200], Loss: 0.0624
Epoch [35/200], Loss: 0

In [19]:
test_loss = evaluate_model(model=decoder, test_dl=test_dl, opt=opt, latents=latents, epochs=1000, device=device)
print(f"AD has finished test evaluation with a test loss of {test_loss}.")

AD has finished test evaluation with a test loss of 0.1657548677176237.


In [24]:
# Randomly sample 5 indices from the test dataset
random.seed(6)
sampled_indices = random.sample(range(len(latents)), 5)

# Extract the corresponding vectors (input data) and their labels
sampled_latents = [latents[i] for i in sampled_indices]  # Only selecting input data, not labels

# Convert to a single tensor (optional)
sampled_latents_tensor = torch.stack(sampled_latents)
random_latents_tensor = torch.randn_like(sampled_latents_tensor)

print("Sampled Vectors Shape:", sampled_latents_tensor.shape)  # Should be (5, *) depending on your data shape
print("Random Vectors Shape:", random_latents_tensor.shape)  # Should be (5, *) depending on your data shape

sampled_test_images = decoder(sampled_latents_tensor).view(-1, 1, 28, 28)
random_test_images = decoder(random_latents_tensor).view(-1, 1, 28, 28)

print("Sampled Images Shape:", sampled_test_images.shape)  # Should be (5, *) depending on your data shape
utils.save_images(sampled_test_images, "sampled_test_images.png")
utils.save_images(random_test_images, "random_test_images.png")

Sampled Vectors Shape: torch.Size([5, 256])
Random Vectors Shape: torch.Size([5, 256])
Sampled Images Shape: torch.Size([5, 1, 28, 28])
