In [None]:
import nest_asyncio
nest_asyncio.apply()

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from skimage.util import random_noise
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings(action = 'ignore')

# PCA

In [None]:
# Read in data
image_data = pd.read_csv("image_data.csv")

# Compute the mean image
mu = np.mean(image_data, axis = 0)

images_centered = image_data - mu

# Reshape the mean image to 28x28
mu_image = mu.values.reshape(28, 28)

# Plot the mean image
plt.imshow(mu_image)
plt.show()

In [None]:
# Plot 5 images
for x in range(3):
    plt.imshow(images_centered.iloc[x].values.reshape(28, 28))
    plt.show()

In [None]:
# Find covariance matrix
cov_matrix = np.cov(images_centered, rowvar = False)

# Find eigenvalues/eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Sort eigenvalues and eigenvectors in descending order
index = np.argsort(eigenvalues)[::-1]
eigenvalues_sort = np.real(eigenvalues[index])
eigenvectors_sort = np.real(eigenvectors[:, index])

# Plot eigen
for i in range(3):
    eigenvector = eigenvectors_sort[:, i].reshape(28, 28)
    plt.imshow(eigenvector)
    plt.show()

In [None]:
eigenvector25 = eigenvectors_sort[:, :25]
eigenvector25_norm = eigenvector25 / np.linalg.norm(eigenvector25, axis = 0)

# Find z
z = np.dot(images_centered, eigenvector25_norm)

for i in range(3):
    print(z[i])

In [None]:
mu_image_1d = mu_image.reshape(1, -1)

# Reconstruct images
reconstructed = np.dot(z, eigenvector25_norm.T) + mu_image_1d

for i in range(3):
    plt.imshow(reconstructed[i].reshape(28, 28))
    plt.show()
    
for i in range(3):
    plt.imshow(image_data.iloc[i].values.reshape(28, 28))
    plt.show()

# Linear Autoencoder

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Encoder, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim, dtype = torch.float64)

    def forward(self, x):
        x = x.view(x.size(0), -1) 
        x = self.fc(x)
        return x

class Decoder(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Decoder, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim, dtype = torch.float64)

    def forward(self, z):
        z = self.fc(z)
        return z

# Define parameters
input_dim = 28 * 28  # Size 28x28
output_dim = 25

# Initialize encoder and decoder
encoder = Encoder(input_dim, output_dim)
decoder = Decoder(output_dim, input_dim)

In [None]:
lr = 0.001
batch_size = 3
epochs = 10

class Dataset(Dataset):
    def __init__(self, csv_file, transform = None):
        self.data = pd.read_csv(csv_file, dtype = "float64")
        self.transform = transform
        
        scaler = MinMaxScaler()
        self.data = scaler.fit_transform(self.data)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]

        if self.transform:
            sample = self.transform(sample)

        return sample

csv_file = 'image_data.csv'

# Data to tensors
transform = torch.tensor

# Data
dataset = Dataset(csv_file, transform = transform)
data_loader = DataLoader(dataset, batch_size = batch_size, shuffle = True)

In [None]:
# Optimizer and Loss
criterion = nn.MSELoss()
optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr = lr)

# Train
for epoch in range(epochs):
    running_loss = 0.0
    for data in data_loader:
        inputs = data 
        optimizer.zero_grad()
        encoded = encoder(inputs)
        decoded = decoder(encoded)
        loss = criterion(decoded, inputs)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(data_loader)}")

In [None]:
# Display image function
def imshow(image):     
    image = image.numpy()
    plt.imshow(np.transpose(image, (1, 2, 0)))
    plt.show()

images = next(iter(data_loader))

with torch.no_grad():
    encoded_images = encoder(images)
    reconstructed_images = decoder(encoded_images)

# Reconstructed images
print('Reconstructed Images:')
imshow(torchvision.utils.make_grid(reconstructed_images.view(batch_size, 1, 28, 28)))

# Non-Linear Autoencoder

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Encoder, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128, dtype = torch.float64)  # Linear layer 1
        self.fc2 = nn.Linear(128, output_dim, dtype = torch.float64) # Linear layer 2
        self.relu = nn.ReLU()  # ReLU activation function

    def forward(self, x):
        x = x.view(x.size(0), -1) 
        x = self.relu(self.fc1(x))  # Apply ReLU activation after first linear layer
        x = self.fc2(x)
        return x

class Decoder(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128, dtype = torch.float64)  # Linear layer 1
        self.fc2 = nn.Linear(128, output_dim, dtype = torch.float64) # Linear layer 2
        self.sigmoid = nn.Sigmoid()  # Sigmoid activation function

    def forward(self, z):
        z = self.fc1(z)
        z = self.sigmoid(self.fc2(z))  # Sigmoid activation
        return z

# Parameters
input_dim = 28 * 28  # Size 28x28
output_dim = 25

# Encoder and decoder
encoder = Encoder(input_dim, output_dim)
decoder = Decoder(output_dim, input_dim)

In [None]:
lr = 0.001
batch_size = 3
epochs = 10

class Dataset(Dataset):
    def __init__(self, csv_file, transform = None):
        self.data = pd.read_csv(csv_file, dtype = "float64")
        self.transform = transform
        
        scaler = MinMaxScaler()
        self.data = scaler.fit_transform(self.data)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]

        if self.transform:
            sample = self.transform(sample)

        return sample

csv_file = 'image_data.csv'

# Data to tensors
transform = torch.tensor

# Data
dataset = Dataset(csv_file, transform = transform)
data_loader = DataLoader(dataset, batch_size = batch_size, shuffle = True)

In [None]:
# Optimizer and Loss
criterion = nn.MSELoss()
optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr = lr)

# Train
for epoch in range(epochs):
    running_loss = 0.0
    for data in data_loader:
        inputs = data 
        optimizer.zero_grad()
        encoded = encoder(inputs)
        decoded = decoder(encoded)
        loss = criterion(decoded, inputs)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(data_loader)}")

In [None]:
# Display image function
def imshow(image):    
    image = image.numpy()
    plt.imshow(np.transpose(image, (1, 2, 0)))
    plt.show()

images = next(iter(data_loader))

with torch.no_grad():
    encoded_images = encoder(images)
    reconstructed_images = decoder(encoded_images)

# Reconstructed images
print('Reconstructed Images:')
imshow(torchvision.utils.make_grid(reconstructed_images.view(batch_size, 1, 28, 28)))

# Denoising Autoencoder

In [None]:
# Read in data
image_data = pd.read_csv("image_data.csv").values

for i in range(len(image_data)):
    image_data[i] = random_noise(image_data[i], mode = 's&p', amount = 0.1)

for i in range(3):
    plt.imshow(image_data[i].reshape(28, 28))
    plt.show()

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Encoder, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128, dtype = torch.float64)  # Linear layer 1
        self.fc2 = nn.Linear(128, output_dim, dtype = torch.float64) # Linear layer 2
        self.relu = nn.ReLU()  # ReLU activation function

    def forward(self, x):
        x = x.view(x.size(0), -1) 
        x = self.relu(self.fc1(x))  # Apply ReLU activation after first linear layer
        x = self.fc2(x)
        return x

class Decoder(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128, dtype = torch.float64)  # Linear layer 1
        self.fc2 = nn.Linear(128, output_dim, dtype = torch.float64) # Linear layer 2
        self.sigmoid = nn.Sigmoid()  # Sigmoid activation function

    def forward(self, z):
        z = self.fc1(z)
        z = self.sigmoid(self.fc2(z))  # Sigmoid activation
        return z

# Parameters
input_dim = 28 * 28  # Size 28x28
output_dim = 25

# Encoder and decoder
encoder = Encoder(input_dim, output_dim)
decoder = Decoder(output_dim, input_dim)

lr = 0.001
batch_size = 3
epochs = 10

class Dataset(Dataset):
    def __init__(self, csv_file, transform = None):
        self.data = pd.read_csv(csv_file, dtype = "float64")
        self.transform = transform
        
        scaler = MinMaxScaler()
        self.data = scaler.fit_transform(self.data)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]

        if self.transform:
            sample = self.transform(sample)

        return sample

csv_file = 'image_data.csv'

# Data to tensors
transform = torch.tensor

# Data
dataset = Dataset(csv_file, transform = transform)
data_loader = DataLoader(dataset, batch_size = batch_size, shuffle = False)

# Optimizer and Loss
criterion = nn.MSELoss()
optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr = lr)

# Train
for epoch in range(epochs):
    running_loss = 0.0
    for data in data_loader:
        inputs = data 
        optimizer.zero_grad()
        encoded = encoder(inputs)
        decoded = decoder(encoded)
        loss = criterion(decoded, inputs)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(data_loader)}")
    
# Display image function
def imshow(image):     
    image = image.numpy()
    plt.imshow(np.transpose(image, (1, 2, 0)))
    plt.show()

images = next(iter(data_loader))

with torch.no_grad():
    encoded_images = encoder(images)
    reconstructed_images = decoder(encoded_images)

# Reconstructed images
print('Reconstructed Images:')
imshow(torchvision.utils.make_grid(reconstructed_images.view(batch_size, 1, 28, 28)))