In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms # Image Transforms and Augmentations
from torchvision import datasets

from torch.utils.data import Dataset


Import the MNIST dataset from pytorch. This segment isn't mine, using it to handle the code.

In [23]:
torchvision.datasets.MNIST('./',download = True) # Downloads the MNIST dataset from torchvision into the current directory

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./
    Split: Train

In [24]:
class MNISTDataset(Dataset):
    """
    MAP-style. __getitem__ and __len__ methods need to be implemented.
    """
    def __init__(self, path, train): # train is a boolean
        Transform = transforms.ToTensor()
        data = datasets.MNIST(root=path, train = train, download = False, transform = Transform)
        self.images = [None] * len(data) #Holds the image pixel array
        self.labels = [None] * len(data) #Holds the number label
        for i in range(len(data)):
            self.images[i] = data[i][0]
            self.labels[i] = data[i][1]

    def __getitem__(self, index):
        x = self.images[index].float()
        x = torch.Tensor(x)
        x = torch.flatten(x) # flatten the image from a 28 x 28 to a 784 1-dimensional tensor
        return x

    # returns the length of the dataset
    def __len__(self):
        return len(self.labels)

In [25]:
# Formally make the datasets
train_dataset = MNISTDataset('./', True)
test_dataset = MNISTDataset('./', False)

print("Train dataset length: ", len(train_dataset))
print("Test dataset length: ", len(test_dataset))

Train dataset length:  60000
Test dataset length:  10000


Train the Autoencoder Model

In [26]:
class AutoEncoder(nn.Module):
    def __init__(self, input_shape, latent_dim = 128):
        super().__init__()

        # 2 layers for the encoder
        self.encoder_l1 = nn.Linear(in_features=input_shape, out_features=latent_dim)
        self.encoder_l2 = nn.Linear(in_features=latent_dim, out_features=latent_dim)

        # decoder
        self.decoder_l1 = nn.Linear(in_features=latent_dim, out_features=latent_dim)
        self.decoder_l2 = nn.Linear(in_features=latent_dim, out_features=input_shape)
    
    def forward(self, x):
        '''
        x is the input data
        returns the reconstruction
        '''
        latent = self.run_encoder(x)
        x_hat = self.run_decoder(latent)
        return x_hat

    def run_encoder(self, x):
        output = F.relu(self.encoder_l1(x)) # relu adds non linearity
        latent = F.relu(self.encoder_l2(output)) # results in the latent vector
        return latent

    def run_decoder(self, latent):
        output = F.relu(self.decoder_l1(latent))
        x_hat = F.relu(self.decoder_l2(output))
        return x_hat

In [27]:
# Gets the shape of an image from the dataset. '.shape' has a [0] to get the integer from the tensor object
image_shape = train_dataset[0].shape[0]

# Initialize the model
model = AutoEncoder(input_shape = image_shape)

In [28]:
model.train() # Set to train() mode

AutoEncoder(
  (encoder_l1): Linear(in_features=784, out_features=128, bias=True)
  (encoder_l2): Linear(in_features=128, out_features=128, bias=True)
  (decoder_l1): Linear(in_features=128, out_features=128, bias=True)
  (decoder_l2): Linear(in_features=128, out_features=784, bias=True)
)

In [29]:
train_loader = torch.utils.data.DataLoader(train_dataset,
                                            batch_size=128, #number of training samples to load in at a time
                                            shuffle=True) # Shuffle the data beforehand
mse = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
n_epochs = 100

for epoch in range(n_epochs):
    loss = 0
    #Batch_features is each training batch. Here it's of the shape (128, 784).
    for batch_features in train_loader:
        batch_features = batch_features.cuda()

        optimizer.zero_grad()

        # compute reconstructions
        outputs = model(batch_features)

        # compute the reconstruction loss of the training data
        train_loss = mse(outputs, batch_features)

        #compute accumulated gradients
        train_loss.backward()

AssertionError: Torch not compiled with CUDA enabled