In [None]:
# CAP 5415 Programming Assignment 03: Autoencoder

"""
Due Date: 1ONov2023
Author: Lam Nguyen

Subject: Autoencoder [2.5 pts]

Overview:

Implement autoencoder using MNIST dataset. The input size of the images will be 28x28 with a single channel. You will implement two different variations, one with fully connected layers and the other convolutional neural network.

Tasks:

1. Implement an autoencoder using fully connected layers. 
    a. The encoder will have 2 layers (with 256, and 128 neurons)
    b. The decoder will have 2 layers (with 256 and 784 neurons)
    c. Train this network using MSE loss for 10 epochs
    d. Compare the number of parameters  in the encoder and decoder.
    e. Create a writeup:
        i. Show 20 reconstructed images from testing data (2 image for each class)
        ii. Show original images

2. Implement an autoencoder using Convolutional layers. 
    a. The encoder will have 2 convolutional layers and 2 max pooling layers
        i. Use kernel size 3x3
        ii. reLU activation
        iii. padding of 1 to preserve the feature map.
    b. The decoder will have 3 convolutional layers
        i. kernel shape is 3x3
        ii. padding = 1
        iii. The first 3 convolutional layers will be followed by an upsampling layer.
                a. This upsampling layer will double the resolution of the feature maps using linear interpolation
    c. Train the network for 10 epochs
    d. Compare the number of parameters in the encoder and decoder.
    e. Compare the total parameters in this autoencoder with the previous autoencoder.
    f. Create Writeup:
        i. Show 20 sample reconstructed images from testing data (2 images for each class)
        ii. show original images
        iii. Compare the reconstructed results with the previous autoencoder

Note that you can choose any optimizer. Just use the same for both variations
        

Sources:

Autoencoder in Pytorch-Theory and Implementation by Patrick Loeber: https://www.youtube.com/watch?v=zp8clK9yCro

How to get info on model parameters using Torchinfo: https://pypi.org/project/torchinfo/

How to save trained model: https://wandb.ai/wandb/common-ml-errors/reports/How-to-Save-and-Load-Models-in-PyTorch--VmlldzozMjg0MTE

How to take subsets of dataset: https://discuss.pytorch.org/t/how-to-get-a-part-of-datasets/82161



"""

In [None]:
# ========================================================================================#
# 1. Load Modules
# ========================================================================================#

import torch
from pathlib import Path
from PIL import Image
import os
import numpy as np
from torchvision.utils import save_image
import torch.optim as optim # Optimization algorithms
import torch.nn as nn # All the Neural network models, loss functions
import torch.nn.functional as F # All functions without parameters
from torch.utils.data import DataLoader # Easier dataset management such as minibatches
import torchvision.datasets as datasets # Standard datasets that can be used as test training data
import torchvision.transforms as transforms # Transformations that can be performed on the dataset
import torchvision.utils
from torchinfo import summary # provides a summary of the model architecture and it's parameters
import logging
import matplotlib.pyplot as plt

# Import some packages for logging training and showing progress
from tqdm_loggable.auto import tqdm



# Set up some basic logging to record traces of training
logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s %(levelname)s %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
        filename="Autoencoder_Documents/Autoencoder_Parameter_Summary.txt" # Save log to a file
    )


# Hyperparameters
input_size = 28*28
hidden_size = 100
num_classes= 10
learning_rate = 1e-3
batch_size = 64
num_epochs = 10
weight_decay = 1e-5
    


# Load GPU Parameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# =======================================================#
# 2. Import Data:
# =======================================================#

train_dataset = datasets.MNIST(root='MNIST_dataset/', 
               train=True, 
               transform=transforms.ToTensor(),
               download=True
               )#Transforms transforms numpy array to tensors so that pytorch can use the data


train_loader = DataLoader(
    dataset = train_dataset,
    batch_size = batch_size,
    shuffle = True
)


In [None]:
# =======================================================#
# 3. Create Fully Connected Autoencoder:
# =======================================================#

class FCC_Autoencoder(nn.Module):

    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(in_features=28*28, out_features=256),
            nn.ReLU(),
            nn.Linear(in_features=256,out_features=128),


        )

        self.decoder = nn.Sequential(
            nn.Linear(128,256),
            nn.ReLU(),
            nn.Linear(256,28*28),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    
# Note: If images are in the range (-1,1) apply Tanh() activation instead of sigmoid


# Input [-1, +1] -> use nn.Tanh

In [None]:
# ========================================================================================#
# 3. Import the Fully Connected Autoencoder Model and train
# ========================================================================================#

model = FCC_Autoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)


outputs = []
for epoch in range(num_epochs):
    for (img, _) in tqdm(train_loader):
        img = img.to(device)
        img = img.reshape(-1, 28*28) # -> use for Autoencoder_Linear
        recon = model(img)
        loss = criterion(recon, img)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch:{epoch+1}, Loss:{loss.item():.4f}')
    outputs.append((epoch, img, recon))

# =======================================================#
# 6. Save the trained Fully Connected Autoencoder
# =======================================================#

print("Saving the Fully Connected Autoencoder Model to the folder: Assignment_03/01_Autoencoder/Trained_Autoencoders")
torch.save(model.state_dict(),'Trained_Autoencoders/FCC_Autoencoder_Model.pth')


In [None]:
# =======================================================#
# 5. Get the number of parameters for the FC-Autoencoder:
# =======================================================#

# Prints out the architecture of the trained model
FCC_Autoencoder_summary = summary(model)
print(FCC_Autoencoder_summary)

logging.info(FCC_Autoencoder_summary) # Saves the parameter data file into the folder Autoencoder_Documents


In [None]:

for k in range(0, num_epochs, 4):
    plt.figure(figsize=(9, 2))
    plt.gray()
    imgs = outputs[k][1].cpu().detach().numpy()
    recon = outputs[k][2].cpu().detach().numpy()
    for i, item in enumerate(imgs):
        if i >= 9: break
        plt.subplot(2, 9, i+1)
        item = item.reshape(-1, 28,28) # -> use for Autoencoder_Linear
        # item: 1, 28, 28
        plt.imshow(item[0])
            
    for i, item in enumerate(recon):
        if i >= 9: break
        plt.subplot(2, 9, 9+i+1) # row_length + i + 1
        item = item.reshape(-1, 28,28) # -> use for Autoencoder_Linear
        # item: 1, 28, 28
        plt.imshow(item[0])

In [None]:
# =======================================================#
# 6. Save the trained Fully Connected Autoencoder
# =======================================================#

print("Saving the Fully Connected Autoencoder Model to the folder: Assignment_03/01_Autoencoder/Trained_Autoencoders")
torch.save(model.state_dict(),'Trained_Autoencoders/FCC_Autoencoder_Model.pth')


In [None]:
class CNN_Autoencoder(nn.Module):
    def __init__(self):
        #N,1,28,28
        super(CNN_Autoencoder,self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=3, padding=1), # N,16,10,10
            nn.ReLU(),
            nn.MaxPool2d(2, stride=2),  # N,16,5,5
            nn.Conv2d(16, 8, 3, stride=2, padding=1), # N,8,3,3
            nn.ReLU(),
            nn.MaxPool2d(2, stride=1) # N,8,2,2
        )
        self.decoder = nn.Sequential(
        nn.ConvTranspose2d(8, 16, 3, stride=2, padding=1), # N,16,3,3
        nn.ReLU(),
        nn.Upsample(scale_factor=2), # N, 16, 6, 6
        nn.ConvTranspose2d(16, 8, 3, stride=1, padding=1), # N,8,6,6
        nn.ReLU(),
        nn.Upsample(scale_factor=2), # N,8,12,12
        nn.ConvTranspose2d(8, 1, 3, stride=1, padding=1,dilation=2), # N,1,14,14
        nn.Tanh(),
        nn.Upsample(scale_factor=2) # N,1,28,28
    )
    def forward(self,x):
        x = self.encoder(x)
        print(x.size())
        x = self.decoder(x)
        print(x.size())
        return x

    


In [None]:
# ========================================================================================#
# 2. Import the Convolutional Autoencoder Model and train
# ========================================================================================#

model = CNN_Autoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)


outputs = []
for epoch in range(num_epochs):
    for (img, _) in tqdm(train_loader):

        img = img.to(device)
        # img = img.reshape(-1, 28*28) # -> use for Autoencoder_Linear
        recon = model(img)
        loss = criterion(recon, img)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(recon.size())

    print(f'Epoch:{epoch+1}, Loss:{loss.item():.4f}')
    outputs.append((epoch, img, recon))

# =======================================================#
# Save the trained Convolutional Autoencoder
# =======================================================#


print("Saving the Convolutional Autoencoder Model to the folder: Assignment_03/01_Autoencoder/Trained_Autoencoders")
torch.save(model.state_dict(),'Trained_Autoencoders/CNN_Autoencoder_Model.pth')


In [None]:
# =======================================================#
# 5. Get the number of parameters for the CNN-Autoencoder:
# =======================================================#

# Prints out the architecture of the trained model
CNN_Autoencoder_summary = summary(model)
print(CNN_Autoencoder_summary)

logging.info(CNN_Autoencoder_summary) # Saves the parameter data file into the folder Autoencoder_Documents

In [None]:


for k in range(0, num_epochs, 4):
    plt.figure(figsize=(9, 2))
    plt.gray()
    imgs = outputs[k][1].cpu().detach().numpy()
    recon = outputs[k][2].cpu().detach().numpy()
    for i, item in enumerate(imgs):
        if i >= 9: break
        plt.subplot(2, 9, i+1)
        # item = item.reshape(-1, 28,28) # -> use for Autoencoder_Linear
        # item: 1, 28, 28
        plt.imshow(item[0])
            
    for i, item in enumerate(recon):
        if i >= 9: break
        plt.subplot(2, 9, 9+i+1) # row_length + i + 1
        # item = item.reshape(-1, 28,28) # -> use for Autoencoder_Linear
        # item: 1, 28, 28
        plt.imshow(item[0])

In [181]:
# Input 20 images into CNN Autoencoder

input_images_array= Path("Test_Images").glob('*.png')

model = CNN_Autoencoder().to(device)

model.load_state_dict(torch.load('Trained_Autoencoders/CNN_Autoencoder_Model.pth'))

model.eval()
counter=0
for image in input_images_array:
    counter+=1
    im = Image.open(image).convert("L")
    im = np.asarray(im)
    convert_tensor = transforms.ToTensor()
    tensor_img = convert_tensor(im).to(device)

    with torch.no_grad():
        processed_img = model(tensor_img)
        print(processed_img.size())
        save_image(processed_img, f'Processed_CNN_Images/processed_{counter}.jpg')


# Why the hell are they coming out the wrong size?  
    




torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])
torch.Size([1, 5, 28])


In [182]:
# Input 20 images into FCC Autoencoder

input_images_array= Path("Test_Images").glob('*.png')

model = FCC_Autoencoder().to(device)

model.load_state_dict(torch.load('Trained_Autoencoders/FCC_Autoencoder_Model.pth'))

model.eval()
counter=0
for image in input_images_array:
    counter+=1
    im = Image.open(image).convert("L")
    im = np.asarray(im)
    convert_tensor = transforms.ToTensor()
    im = convert_tensor(im)
    im = im.to(device)
    im = im.reshape(-1,28*28)

    with torch.no_grad():
        processed_img = model(im)
        processed_img = processed_img.reshape(-1,28,28)
        print(processed_img.size())
        save_image(processed_img, f'Processed_FCC_Images/processed_{counter}.jpg')


    



torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
torch.Size([1, 28, 28])
