install the necessary packages and setting up the environment

In [16]:
# Install the necessary packages
!pip install numpy --pre torch torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/nightly/cu117
!pip install numpy matplotlib

# Import the packages
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt


Looking in indexes: https://download.pytorch.org/whl/nightly/cu117
[0m[31mERROR: Could not find a version that satisfies the requirement numpy (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for numpy[0m[31m
[0m

Load the shoe images

In [17]:
# Define a transform to preprocess the images
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load the shoe images from the specified path
shoe_dataset = torchvision.datasets.ImageFolder(root='/kaggle/input/sneaker-image-dataset/', transform=transform)

# Create a data loader to load the images in batches
dataloader = torch.utils.data.DataLoader(shoe_dataset, batch_size=64, shuffle=True)


Define the VAE model architecture

In [18]:
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        
        # The encoder part of the VAE
        self.fc1 = nn.Linear(1024 * 1024 * 3, 400) # Flattens the image and maps it to a 400 dimensional vector
        self.fc21 = nn.Linear(400, 20) # Maps the 400 dimensional vector to a 20 dimensional mean vector
        self.fc22 = nn.Linear(400, 20) # Maps the 400 dimensional vector to a 20 dimensional log-variance vector
        
        # The decoder part of the VAE
        self.fc3 = nn.Linear(20, 400) # Maps the 20 dimensional vector to a 400 dimensional vector
        self.fc4 = nn.Linear(400, 1024 * 1024 * 3) # Unflattens the 400 dimensional vector back to an image
    
    def encode(self, x):
        # Encode the image
        h1 = F.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)
    
    def reparameterize(self, mu, logvar):
        # Sample from the latent space
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    def decode(self, z):
        # Decode the latent vector back to an image
        h3 = F.relu(self.fc3(z))
        return torch.sigmoid(self.fc4(h3))

    def forward(self, x):
        # Forward pass through the VAE
        mu, logvar = self.encode(x.view(-1, 1024 * 1024 * 3))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

# Initialize the VAE model
model = VAE().to('cuda') # Move the model to the GPU for faster training

AssertionError: Torch not compiled with CUDA enabled

Define the loss function for the VAE

In [None]:
def loss_function(recon_x, x, mu, logvar):
    # Binary cross-entropy loss
    BCE = F.binary_cross_entropy(recon_x, x.view(-1, 1024 * 1024 * 3), reduction='sum')
    
    # Kullback-Leibler divergence loss
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    
    # Return the total loss
    return BCE + KLD

Train the VAE model

In [None]:
# Define the optimizer and the number of epochs
optimizer = optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 100

# Start training the model
for epoch in range(num_epochs):
    train_loss = 0
    
    # Loop through the shoe images in each batch
    for i, data in enumerate(dataloader):
        optimizer.zero_grad()
        
        # Get the input images and move them to the GPU
        inputs, _ = data
        inputs = inputs.to('cuda')
        
        # Forward pass through the VAE
        recon_inputs, mu, logvar = model(inputs)
        
        # Compute the loss
        loss = loss_function(recon_inputs, inputs, mu, logvar)
        
        # Backpropagate the error and update the parameters
        loss.backward()
        optimizer.step()
        
        # Accumulate the loss for each batch
        train_loss += loss.item()
    
    # Print the average loss for each epoch
    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, train_loss/len(dataloader.dataset)))


Generate sample images from the trained model!

In [None]:
# Define the number of sample images to generate
num_samples = 25

# Generate sample images
with torch.no_grad():
    sample_images = model.decode(torch.randn(num_samples, latent_size)).clamp(0, 1)

# Plot the generated images
plt.figure(figsize=(10, 10))
for i in range(num_samples):
    plt.subplot(5, 5, i + 1)
    plt.imshow(np.transpose(sample_images[i].numpy(), (1, 2, 0)))
    plt.axis('off')

plt.show()