In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

np.random.seed(42)

iris = load_iris()
X = iris.data
y = iris.target

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test = train_test_split(X_scaled, test_size=0.2, random_state=42)

input_dim = X_train.shape[1]
hidden_dim = 8
latent_dim = 2

def sigmoid(x):
    return 1 / (1 + np.exp(-np.clip(x, -15, 15)))

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

class VariationalAutoencoder:
    def __init__(self, input_dim, hidden_dim, latent_dim, learning_rate=0.001):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.latent_dim = latent_dim
        self.learning_rate = learning_rate
        
        self.W1 = np.random.randn(input_dim, hidden_dim) * 0.01
        self.b1 = np.zeros((1, hidden_dim))
        
        self.W_mean = np.random.randn(hidden_dim, latent_dim) * 0.01
        self.b_mean = np.zeros((1, latent_dim))
        self.W_logvar = np.random.randn(hidden_dim, latent_dim) * 0.01
        self.b_logvar = np.zeros((1, latent_dim))
        
        self.W3 = np.random.randn(latent_dim, hidden_dim) * 0.01
        self.b3 = np.zeros((1, hidden_dim))
        self.W4 = np.random.randn(hidden_dim, input_dim) * 0.01
        self.b4 = np.zeros((1, input_dim))
    
    def encode(self, X):
        self.h1 = relu(np.dot(X, self.W1) + self.b1)
        self.z_mean = np.dot(self.h1, self.W_mean) + self.b_mean
        self.z_logvar = np.dot(self.h1, self.W_logvar) + self.b_logvar
        
        epsilon = np.random.normal(size=(len(X), self.latent_dim))
        self.z = self.z_mean + np.exp(0.5 * self.z_logvar) * epsilon
        
        return self.z, self.z_mean, self.z_logvar
    
    def decode(self, z):
        self.h3 = relu(np.dot(z, self.W3) + self.b3)
        self.reconstruction = sigmoid(np.dot(self.h3, self.W4) + self.b4)
        
        return self.reconstruction
    
    def forward(self, X):
        self.z, self.z_mean, self.z_logvar = self.encode(X)
        self.reconstruction = self.decode(self.z)
        
        return self.reconstruction
    
    def loss_function(self, X, reconstruction):
        reconstruction_loss = -np.sum(X * np.log(reconstruction + 1e-10) + 
                                     (1 - X) * np.log(1 - reconstruction + 1e-10), axis=1)
        
        kl_loss = -0.5 * np.sum(1 + self.z_logvar - np.square(self.z_mean) - np.exp(self.z_logvar), axis=1)
        
        total_loss = np.mean(reconstruction_loss + kl_loss)
        
        return total_loss, reconstruction_loss, kl_loss
    
    def backward(self, X):
        m = X.shape[0]
        
        drec = -(X / (self.reconstruction + 1e-10) - (1 - X) / (1 - self.reconstruction + 1e-10)) / m
        
        dW4 = np.dot(self.h3.T, drec)
        db4 = np.sum(drec, axis=0, keepdims=True)
        dh3 = np.dot(drec, self.W4.T)
        dh3_input = dh3 * relu_derivative(self.h3)
        
        dW3 = np.dot(self.z.T, dh3_input)
        db3 = np.sum(dh3_input, axis=0, keepdims=True)
        dz = np.dot(dh3_input, self.W3.T)
        
        dz_mean = self.z_mean / m
        dz_logvar = (-0.5 + 0.5 * np.exp(self.z_logvar)) / m
        
        dz_mean += dz
        dz_logvar += dz * np.exp(0.5 * self.z_logvar) * 0.5
        
        dW_mean = np.dot(self.h1.T, dz_mean)
        db_mean = np.sum(dz_mean, axis=0, keepdims=True)
        dW_logvar = np.dot(self.h1.T, dz_logvar)
        db_logvar = np.sum(dz_logvar, axis=0, keepdims=True)
        
        dh1 = np.dot(dz_mean, self.W_mean.T) + np.dot(dz_logvar, self.W_logvar.T)
        dh1_input = dh1 * relu_derivative(self.h1)
        
        dW1 = np.dot(X.T, dh1_input)
        db1 = np.sum(dh1_input, axis=0, keepdims=True)
        
        self.W1 -= self.learning_rate * dW1
        self.b1 -= self.learning_rate * db1
        self.W_mean -= self.learning_rate * dW_mean
        self.b_mean -= self.learning_rate * db_mean
        self.W_logvar -= self.learning_rate * dW_logvar
        self.b_logvar -= self.learning_rate * db_logvar
        self.W3 -= self.learning_rate * dW3
        self.b3 -= self.learning_rate * db3
        self.W4 -= self.learning_rate * dW4
        self.b4 -= self.learning_rate * db4
    
    def train(self, X, epochs=100, batch_size=32, verbose=True):
        losses = []
        num_samples = X.shape[0]
        num_batches = num_samples // batch_size
        
        for epoch in range(epochs):
            epoch_loss = 0
            indices = np.random.permutation(num_samples)
            X_shuffled = X[indices]
            
            for i in range(num_batches):
                start_idx = i * batch_size
                end_idx = min((i + 1) * batch_size, num_samples)
                batch_X = X_shuffled[start_idx:end_idx]
                
                reconstructions = self.forward(batch_X)
                
                loss, _, _ = self.loss_function(batch_X, reconstructions)
                epoch_loss += loss
                
                self.backward(batch_X)
            
            avg_loss = epoch_loss / num_batches
            losses.append(avg_loss)
            
            if verbose and (epoch + 1) % 100 == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")
        
        return losses
    
    def generate_samples(self, num_samples=1):
        z_sample = np.random.normal(0, 1, size=(num_samples, self.latent_dim))
        samples = self.decode(z_sample)
        return samples
    
    def predict(self, input_data):
        input_scaled = scaler.transform(input_data.reshape(1, -1))
        _, z_mean, _ = self.encode(input_scaled)
        reconstruction = self.decode(z_mean)
        reconstruction_original = scaler.inverse_transform(reconstruction)
        return reconstruction_original

vae = VariationalAutoencoder(input_dim=input_dim, hidden_dim=hidden_dim, latent_dim=latent_dim)
losses = vae.train(X_train, epochs=10000, batch_size=16)

num_synthetic_samples = 5
synthetic_samples = vae.generate_samples(num_synthetic_samples)
synthetic_samples_original = scaler.inverse_transform(synthetic_samples)

print("Generated Synthetic IRIS Samples:")
for i, sample in enumerate(synthetic_samples_original):
    print(f"Sample {i+1}: Sepal Length: {sample[0]:.2f}, Sepal Width: {sample[1]:.2f}, Petal Length: {sample[2]:.2f}, Petal Width: {sample[3]:.2f}")

Epoch 100/10000, Loss: 2.7514
Epoch 200/10000, Loss: 2.7409
Epoch 300/10000, Loss: 2.7448
Epoch 400/10000, Loss: 2.7429
Epoch 500/10000, Loss: 2.7444
Epoch 600/10000, Loss: 2.7443
Epoch 700/10000, Loss: 2.7493
Epoch 800/10000, Loss: 2.7495
Epoch 900/10000, Loss: 2.7489
Epoch 1000/10000, Loss: 2.7442
Epoch 1100/10000, Loss: 2.7485
Epoch 1200/10000, Loss: 2.7402
Epoch 1300/10000, Loss: 2.7379
Epoch 1400/10000, Loss: 2.7564
Epoch 1500/10000, Loss: 2.7448
Epoch 1600/10000, Loss: 2.7446
Epoch 1700/10000, Loss: 2.7443
Epoch 1800/10000, Loss: 2.7438
Epoch 1900/10000, Loss: 2.7503
Epoch 2000/10000, Loss: 2.7523
Epoch 2100/10000, Loss: 2.7410
Epoch 2200/10000, Loss: 2.7500
Epoch 2300/10000, Loss: 2.7467
Epoch 2400/10000, Loss: 2.7468
Epoch 2500/10000, Loss: 2.7485
Epoch 2600/10000, Loss: 2.7471
Epoch 2700/10000, Loss: 2.7436
Epoch 2800/10000, Loss: 2.7421
Epoch 2900/10000, Loss: 2.7470
Epoch 3000/10000, Loss: 2.7497
Epoch 3100/10000, Loss: 2.7481
Epoch 3200/10000, Loss: 2.7384
Epoch 3300/10000,