In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [2]:
def circle(r, θ):
    x = r * np.cos(θ)
    y = r * np.sin(θ)
    return x, y

In [3]:
train = []
for i in range(1000):
    θ = np.random.uniform(0, 2*np.pi)
    r = np.random.randn()
    x, y = (circle(r, θ))
    train.append([x, y])
train[0:5]

[[-0.008842531205746114, 0.02761907014736295],
 [1.5986899082860075, -0.5896486210070966],
 [-0.0570285556530223, -0.07346823836375693],
 [-0.008302338999917608, 0.09100010524136315],
 [-0.5349208899207861, -0.218688533467988]]

In [4]:
train = np.array(train)
train

array([[-0.00884253,  0.02761907],
       [ 1.59868991, -0.58964862],
       [-0.05702856, -0.07346824],
       ...,
       [ 0.63993958, -0.43692145],
       [-0.09113293, -0.19439704],
       [-0.4786428 , -0.10114104]])

In [5]:
def PCA_train(train):
    scaled_train = StandardScaler().fit_transform(train)
    pca = PCA(n_components=1)
    pca.fit(scaled_train)
    train_pca = pca.transform(scaled_train)
    inversed = pca.inverse_transform(train_pca)

    mse = ((train - inversed) ** 2).mean(axis=None)
    return mse

<img src="./autoencoder.png" height="768" width="1024">

In [35]:
import numpy as np

def tanh(x):
    return np.tanh(x)

def encoder(data, weights, bias):
    return tanh(np.dot(data, weights) + bias)

def decoder(data, weights, bias):
    return tanh(np.dot(data, weights) + bias)

input_dim = train.shape[1]   # 2
latent_dim1 = 1              # first bottleneck
latent_dim2 = 1              # hidden layer (optional deeper level)

# Encoder weights
we1 = np.random.randn(input_dim, latent_dim1)     # (2,1)
be1 = np.random.randn(latent_dim1)

we2 = np.random.randn(latent_dim1, latent_dim2)   # (1,1)
be2 = np.random.randn(latent_dim2)

# Decoder weights
wd1 = np.random.randn(latent_dim2, latent_dim1)   # (1,1)
bd1 = np.random.randn(latent_dim1)

wd2 = np.random.randn(latent_dim1, input_dim)     # (1,2)
bd2 = np.random.randn(input_dim)

def forward_pass(train_data, we1, be1, we2, be2, wd1, bd1, wd2, bd2):
    print("Input shape:", train_data.shape)
    z1 = encoder(train_data, we1, be1)   # (1000, 1)
    print("Encoder 1 shape:", z1.shape)
    z2 = encoder(z1, we2, be2)           # (1000, 1)
    print("Encoder 2 shape:", z2.shape)
    z3 = encoder(z2, wd1, bd1)           # (1000, 1)
    print("Bottleneck shape:", z3.shape)
    output = decoder(z3, wd2, bd2)       # (1000, 2)
    print("Output shape:", output.shape)

    mse = ((train_data - output) ** 2).mean()
    return mse

In [36]:
print(f'MSE (Autoencoder): {forward_pass(train, we1, be1, we2, be2, wd1, bd1, wd2, bd2)}')
print(f'MSE (PCA): {PCA_train(train)}')

Input shape: (1000, 2)
Encoder 1 shape: (1000, 1)
Encoder 2 shape: (1000, 1)
Bottleneck shape: (1000, 1)
Output shape: (1000, 2)
MSE (Autoencoder): 0.8904275102325788
MSE (PCA): 0.2859627482660504


In [8]:
import numpy as np

# ----------------------------- Activation & Derivative -----------------------------
def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2

# ----------------------------- Forward & Backward Functions -----------------------------
def forward(data, we1, be1, we2, be2, wd1, bd1, wd2, bd2):
    # Forward pass
    z1 = np.dot(data, we1) + be1
    a1 = tanh(z1)
    
    z2 = np.dot(a1, we2) + be2
    a2 = tanh(z2)
    
    z3 = np.dot(a2, wd1) + bd1
    a3 = tanh(z3)
    
    z4 = np.dot(a3, wd2) + bd2
    output = tanh(z4)
    
    return z1, a1, z2, a2, z3, a3, z4, output

def compute_loss(y_true, y_pred):
    return ((y_true - y_pred) ** 2).mean()

# ----------------------------- Training Function -----------------------------
def train_autoencoder(train_data, lr=0.01, epochs=1000):
    input_dim = train_data.shape[1]
    latent_dim1 = 1
    latent_dim2 = 1
    
    # Initialize weights and biases
    we1 = np.random.randn(input_dim, latent_dim1)
    be1 = np.zeros(latent_dim1)

    we2 = np.random.randn(latent_dim1, latent_dim2)
    be2 = np.zeros(latent_dim2)

    wd1 = np.random.randn(latent_dim2, latent_dim1)
    bd1 = np.zeros(latent_dim1)

    wd2 = np.random.randn(latent_dim1, input_dim)
    bd2 = np.zeros(input_dim)
    
    for epoch in range(epochs):
        # Forward
        z1, a1, z2, a2, z3, a3, z4, output = forward(train_data, we1, be1, we2, be2, wd1, bd1, wd2, bd2)
        
        # Loss
        loss = compute_loss(train_data, output)
        
        # --------------------- Backward Pass ---------------------
        # Output layer
        d_output = 2 * (output - train_data) * tanh_derivative(z4)      # (N, 2)
        d_wd2 = np.dot(a3.T, d_output) / len(train_data)
        d_bd2 = d_output.mean(axis=0)

        # Decoder1
        d_a3 = np.dot(d_output, wd2.T) * tanh_derivative(z3)           # (N, 1)
        d_wd1 = np.dot(a2.T, d_a3) / len(train_data)
        d_bd1 = d_a3.mean(axis=0)

        # Encoder2
        d_a2 = np.dot(d_a3, wd1.T) * tanh_derivative(z2)               # (N, 1)
        d_we2 = np.dot(a1.T, d_a2) / len(train_data)
        d_be2 = d_a2.mean(axis=0)

        # Encoder1
        d_a1 = np.dot(d_a2, we2.T) * tanh_derivative(z1)               # (N, 2)
        d_we1 = np.dot(train_data.T, d_a1) / len(train_data)
        d_be1 = d_a1.mean(axis=0)
        
        # --------------------- Parameter Update ---------------------
        we1 -= lr * d_we1
        be1 -= lr * d_be1
        we2 -= lr * d_we2
        be2 -= lr * d_be2
        wd1 -= lr * d_wd1
        bd1 -= lr * d_bd1
        wd2 -= lr * d_wd2
        bd2 -= lr * d_bd2

        # Optional: Print loss
        if epoch % 100 == 0 or epoch == epochs - 1:
            print(f"Epoch {epoch+1}/{epochs} - Loss: {loss:.6f}")

    return we1, be1, we2, be2, wd1, bd1, wd2, bd2

In [9]:
trained_params = train_autoencoder(train, lr=0.05, epochs=1000)

Epoch 1/1000 - Loss: 0.643577
Epoch 101/1000 - Loss: 0.311584
Epoch 201/1000 - Loss: 0.305163
Epoch 301/1000 - Loss: 0.300358
Epoch 401/1000 - Loss: 0.295409
Epoch 501/1000 - Loss: 0.290490
Epoch 601/1000 - Loss: 0.285871
Epoch 701/1000 - Loss: 0.281734
Epoch 801/1000 - Loss: 0.278232
Epoch 901/1000 - Loss: 0.275460
Epoch 1000/1000 - Loss: 0.273382


In [10]:
print(f'MSE (Autoencoder): {forward_pass(train, *trained_params)}')
print(f'MSE (PCA): {PCA_train(train)}')

MSE (Autoencoder): 0.273363818066893
MSE (PCA): 0.2859627482660504
