In [None]:
import AutoDecoder, AD_Trainer
import utils
from evaluate import evaluate_model
import torch
import torch.optim as optim
import torch.nn as nn
import csv
import time
import random

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

## Create DataLoaders

In [None]:
train_ds, train_dl, test_ds, test_dl = utils.create_dataloaders(data_path="dataset" ,batch_size=64)

## Train Auto Decoder

In [None]:
architectures = [
    nn.Sequential(
        nn.Linear(64, 7 * 7 * 64),
        nn.ReLU(),
        nn.Unflatten(1, (64, 7, 7)),
        nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.ReLU(),
        nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.ReLU(),
        nn.ConvTranspose2d(16, 1, kernel_size=3, stride=1, padding=1)
    ),
    
    nn.Sequential(
        nn.Linear(32, 4 * 4 * 128),
        nn.ReLU(),
        nn.Unflatten(1, (128, 4, 4)),
        nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
        nn.ReLU(),
        nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=2),
        nn.ReLU(),
        nn.ConvTranspose2d(32, 16, kernel_size=4, stride=2, padding=1),
        nn.ReLU(),
        nn.Conv2d(16, 1, kernel_size=3, stride=1, padding=1)
    )
,
    
    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
    
    nn.Sequential(
        nn.Linear(16, 7 * 7 * 32),
        nn.ReLU(),
        nn.Unflatten(1, (32, 7, 7)),
        nn.ConvTranspose2d(32, 16, kernel_size=4, stride=2, padding=1),
        nn.ReLU(),
        nn.ConvTranspose2d(16, 8, kernel_size=4, stride=2, padding=1),
        nn.ReLU(),
        nn.ConvTranspose2d(8, 1, kernel_size=3, stride=1, padding=1)
    ),
    
    nn.Sequential(
        nn.Linear(256, 7 * 7 * 512),
        nn.ReLU(),
        nn.Unflatten(1, (512, 7, 7)),
        nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1),
        nn.ReLU(),
        nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
        nn.ReLU(),
        nn.ConvTranspose2d(128, 1, kernel_size=3, stride=1, padding=1)
    )
]



latent_dims = [64, 32, 128, 16, 256]
auto_decoders = [AutoDecoder.AutoDecoder(arch) for arch in architectures]
trainers = [AD_Trainer.AD_Trainer(decoder=auto_decoders[i], dataloader=train_dl, latent_dim=latent_dims[i], device=device, lr=0.005) for i in range(len(latent_dims))]

In [None]:
num_test_samples = len(test_dl.dataset)

latents_list = [torch.nn.Parameter(torch.randn(num_test_samples, trainers[i].latent_dim).to(device)) for i in range(5)]
optimizers = [optim.Adam([latents], lr=1e-3) for latents in latents_list]

csv_file_path = 'results.csv'

with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    header = ['Index'] + [f'Epoch {i+1} Loss' for i in range(200)] + ['Final Test Loss']
    writer.writerow(header)

for index, trainer in enumerate(trainers):
    start_time = time.time()
    train_loss = trainer.train(num_epochs=200)
    end_time = time.time()
    
    elapsed_time = end_time - start_time
    print(f"Trainer {index} has finished training in {elapsed_time:.2f} seconds.")

    start_time = time.time()
    test_loss = evaluate_model(model=auto_decoders[index], test_dl=test_dl, opt=optimizers[index], latents=latents_list[index], epochs=100, device=device) 
    end_time = time.time()
    
    elapsed_time = end_time - start_time
    print(f"AD {index} has finished test evaluation in {elapsed_time:.2f} seconds.")

    row = [index] + train_loss + [test_loss]

    with open(csv_file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row)

print(f"Results saved to {csv_file_path}.")

## Fine Tuning 

In [None]:
archs = [
    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.2),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.2),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    )
]

auto_decoders = [AutoDecoder.AutoDecoder(arch) for arch in archs for _ in range (3)]
learning_rates = [lr for lr in [0.001, 0.0001, 0.005] for _ in range(4)]
trainers = [AD_Trainer.AD_Trainer(decoder=auto_decoders[i], dataloader=train_dl, latent_dim=128, device=device, lr=learning_rates[i]) for i in range(len(auto_decoders))]

In [None]:
num_test_samples = len(test_dl.dataset)

latents_list = [torch.nn.Parameter(torch.randn(num_test_samples, trainers[i].latent_dim).to(device)) for i in range(12)]
optimizers = [optim.Adam([latents], lr=1e-3) for latents in latents_list]

csv_file_path = 'fine_tune_results.csv'

with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    header = ['Index'] + [f'Epoch {i+1} Loss' for i in range(200)] + ['Final Test Loss']
    writer.writerow(header)

for index, trainer in enumerate(trainers):
    start_time = time.time()
    train_loss = trainer.train(num_epochs=200)
    end_time = time.time()
    
    elapsed_time = end_time - start_time
    print(f"Trainer {index} has finished training in {elapsed_time:.2f} seconds.")

    start_time = time.time()
    test_loss = evaluate_model(model=auto_decoders[index], test_dl=test_dl, opt=optimizers[index], latents=latents_list[index], epochs=100, device=device) 
    end_time = time.time()
    
    elapsed_time = end_time - start_time
    print(f"AD {index} has finished test evaluation in {elapsed_time:.2f} seconds.")

    row = [index] + train_loss + [test_loss]

    with open(csv_file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row)

print(f"Results saved to {csv_file_path}.")

## Fine tune LeakyReLu


In [None]:
archs = [
    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.05),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.05),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.1),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.1),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.15),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.15),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.2),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.2),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.25),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.25),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
]

auto_decoders = [AutoDecoder.AutoDecoder(arch) for arch in archs]
trainers = [AD_Trainer.AD_Trainer(decoder=auto_decoders[i], dataloader=train_dl, latent_dim=128, device=device, lr=5e-3) for i in range(len(auto_decoders))]

In [None]:
num_test_samples = len(test_dl.dataset)

latents_list = [torch.nn.Parameter(torch.randn(num_test_samples, trainers[i].latent_dim).to(device)) for i in range(len(auto_decoders))]
optimizers = [optim.Adam([latents], lr=1e-3) for latents in latents_list]

csv_file_path = 'fine_tune_leaky_results.csv'

with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    header = ['Index'] + [f'Epoch {i+1} Loss' for i in range(200)] + ['Final Test Loss']
    writer.writerow(header)

for index, trainer in enumerate(trainers):
    start_time = time.time()
    train_loss = trainer.train(num_epochs=200)
    end_time = time.time()
    
    elapsed_time = end_time - start_time
    print(f"Trainer {index} has finished training in {elapsed_time:.2f} seconds.")

    start_time = time.time()
    test_loss = evaluate_model(model=auto_decoders[index], test_dl=test_dl, opt=optimizers[index], latents=latents_list[index], epochs=100, device=device) 
    end_time = time.time()
    
    elapsed_time = end_time - start_time
    print(f"AD {index} has finished test evaluation in {elapsed_time:.2f} seconds.")

    row = [index] + train_loss + [test_loss]

    with open(csv_file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row)

print(f"Results saved to {csv_file_path}.")

## Fine tune dropout

In [None]:
archs = [
    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

   nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

   nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
    
    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    )
]

auto_decoders = [AutoDecoder.AutoDecoder(arch) for arch in archs]
trainers = [AD_Trainer.AD_Trainer(decoder=auto_decoders[i], dataloader=train_dl, latent_dim=128, device=device, lr=5e-3) for i in range(len(auto_decoders))]

In [None]:
#Fine tune results

# Initialize the results list to hold all the data
num_test_samples = len(test_dl.dataset)

# Create latent parameters and optimizers for each trainer
latents_list = [torch.nn.Parameter(torch.randn(num_test_samples, trainers[i].latent_dim).to(device)) for i in range(len(auto_decoders))]
optimizers = [optim.Adam([latents], lr=1e-3) for latents in latents_list]

# Save results to a CSV file
csv_file_path = 'fine_tune_dropout_results.csv'

# Write header to the CSV file first
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    header = ['Index'] + [f'Epoch {i+1} Loss' for i in range(200)] + ['Final Test Loss']
    writer.writerow(header)

# Main training and evaluation loop
for index, trainer in enumerate(trainers):
    start_time = time.time()  # Record the start time
    train_loss = trainer.train(num_epochs=200)  # Train the model
    end_time = time.time()  # Record the end time
    
    elapsed_time = end_time - start_time  # Calculate elapsed time
    print(f"Trainer {index} has finished training in {elapsed_time:.2f} seconds.")

    start_time = time.time()  # Record the start time
    test_loss = evaluate_model(model=auto_decoders[index], test_dl=test_dl, opt=optimizers[index], latents=latents_list[index], epochs=100, device=device) 
    end_time = time.time()  # Record the end time
    
    elapsed_time = end_time - start_time  # Calculate elapsed time
    print(f"AD {index} has finished test evaluation in {elapsed_time:.2f} seconds.")

    # Prepare the row to be saved
    row = [index] + train_loss + [test_loss]

    # Append results to the CSV file after each iteration
    with open(csv_file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row)

print(f"Results saved to {csv_file_path}.")

## Fine Tune Both

In [None]:
archs = [
    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.05),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.05),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.05),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.05),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.05),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.05),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.05),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.05),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.05),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.05),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),


    

    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.1),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.1),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.1),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.1),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.1),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.1),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.1),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.1),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.1),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.1),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),



    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.15),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.15),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.15),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.15),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.15),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.15),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.15),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.15),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.15),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.15),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),




    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.2),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),



    nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.25),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.25),
        nn.Dropout(0.1),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),

     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.25),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.25),
        nn.Dropout(0.2),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.25),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.25),
        nn.Dropout(0.3),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.25),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.25),
        nn.Dropout(0.4),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    ),
     nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(0.25),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(0.25),
        nn.Dropout(0.5),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    )
]

auto_decoders = [AutoDecoder.AutoDecoder(arch) for arch in archs]
trainers = [AD_Trainer.AD_Trainer(decoder=auto_decoders[i], dataloader=train_dl, latent_dim=128, device=device, lr=5e-3) for i in range(len(auto_decoders))]

In [None]:
#Fine tune results

# Initialize the results list to hold all the data
num_test_samples = len(test_dl.dataset)

# Create latent parameters and optimizers for each trainer
latents_list = [torch.nn.Parameter(torch.randn(num_test_samples, trainers[i].latent_dim).to(device)) for i in range(len(auto_decoders))]
optimizers = [optim.Adam([latents], lr=1e-3) for latents in latents_list]

# Save results to a CSV file
csv_file_path = 'fine_tune_both_results.csv'

# Write header to the CSV file first
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    header = ['Index'] + [f'Epoch {i+1} Loss' for i in range(200)] + ['Final Test Loss']
    writer.writerow(header)

# Main training and evaluation loop
for index, trainer in enumerate(trainers):
    start_time = time.time()  # Record the start time
    train_loss = trainer.train(num_epochs=200)  # Train the model
    end_time = time.time()  # Record the end time
    
    elapsed_time = end_time - start_time  # Calculate elapsed time
    print(f"Trainer {index} has finished training in {elapsed_time:.2f} seconds.")

    start_time = time.time()  # Record the start time
    test_loss = evaluate_model(model=auto_decoders[index], test_dl=test_dl, opt=optimizers[index], latents=latents_list[index], epochs=100, device=device) 
    end_time = time.time()  # Record the end time
    
    elapsed_time = end_time - start_time  # Calculate elapsed time
    print(f"AD {index} has finished test evaluation in {elapsed_time:.2f} seconds.")

    # Prepare the row to be saved
    row = [index] + train_loss + [test_loss]

    # Append results to the CSV file after each iteration
    with open(csv_file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row)

print(f"Results saved to {csv_file_path}.")

## Best Architecture

In [None]:
arch = nn.Sequential(
        nn.Linear(128, 7 * 7 * 256),
        nn.ReLU(),
        nn.Unflatten(1, (256, 7, 7)),
        nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.ConvTranspose2d(64, 1, kernel_size=3, stride=1, padding=1)
    )
decoder = AutoDecoder.AutoDecoder(arch)
trainers = [AD_Trainer.AD_Trainer(decoder=decoder, dataloader=train_dl, latent_dim=128, device=device, lr=5e-3)]

In [None]:
#Fine tune results

# Initialize the results list to hold all the data
num_test_samples = len(test_dl.dataset)

# Create latent parameters and optimizers for each trainer
latents_list = [torch.nn.Parameter(torch.randn(num_test_samples, trainers[0].latent_dim).to(device))]
optimizers = [optim.Adam([latents], lr=1e-3) for latents in latents_list]

# Save results to a CSV file
csv_file_path = 'best_results.csv'

# Write header to the CSV file first
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    header = ['Index'] + [f'Epoch {i+1} Loss' for i in range(500)] +  ['Final Train Loss'] + ['Final Test Loss']
    writer.writerow(header)

# Main training and evaluation loop
for index, trainer in enumerate(trainers):
    start_time = time.time()  # Record the start time
    train_loss = trainer.train(num_epochs=500)  # Train the model
    end_time = time.time()  # Record the end time
    
    elapsed_time = end_time - start_time  # Calculate elapsed time
    print(f"Trainer {index} has finished training in {elapsed_time:.2f} seconds.")

    start_time = time.time()  # Record the start time
    train_loss_evaluate = evaluate_model(model=auto_decoders[index], test_dl=train_dl, opt=optimizers[index], latents=latents_list[index], epochs=500, device=device) 
    test_loss = evaluate_model(model=auto_decoders[index], test_dl=test_dl, opt=optimizers[index], latents=latents_list[index], epochs=500, device=device) 
    end_time = time.time()  # Record the end time
    
    elapsed_time = end_time - start_time  # Calculate elapsed time
    print(f"AD {index} has finished test evaluation in {elapsed_time:.2f} seconds.")

    # Prepare the row to be saved
    row = [index] + train_loss + [train_loss_evaluate] +  [test_loss]

    # Append results to the CSV file after each iteration
    with open(csv_file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row)

print(f"Results saved to {csv_file_path}.")

In [None]:
trainer.train(num_epochs=500)
num_test_samples = len(test_dl.dataset)
latents = torch.nn.Parameter(torch.randn(num_test_samples, trainer.latent_dim).to(device))
opt = optim.Adam([latents], lr=1e-3)

In [None]:
test_loss = evaluate_model(model=decoder, test_dl=test_dl, opt=opt, latents=latents, epochs=1000, device=device)
print(test_loss)

## Sample specific vectors

In [None]:
random.seed(6)
sampled_indices = random.sample(range(len(latents)), 5)

sampled_latents = [latents[i] for i in sampled_indices]

sampled_latents_tensor = torch.stack(sampled_latents)
random_latents_tensor = torch.randn_like(sampled_latents_tensor)

sampled_test_images = decoder(sampled_latents_tensor).view(-1, 1, 28, 28)
random_test_images = decoder(random_latents_tensor).view(-1, 1, 28, 28)

utils.save_images(sampled_test_images, "sampled_test_images.png")
utils.save_images(random_test_images, "random_test_images.png")