In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

In [16]:
class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(64, latent_dim)
        self.fc_logvar = nn.Linear(64, latent_dim)
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
            nn.Sigmoid()
        )

    def encode(self, x):
        h = self.encoder(x)
        return self.fc_mu(h), self.fc_logvar(h)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        return self.decoder(z)

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

In [17]:
def loss_function(recon_x, x, mu, logvar, beta=1.0):
    recon_loss = nn.MSELoss(reduction='sum')(recon_x, x)
    kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + beta * kl_div

In [18]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

def preprocess_data(dataset_path, drop_columns=None):
    df = pd.read_csv(dataset_path)

    if drop_columns is not None:
        if not isinstance(drop_columns, (list, pd.Index)):
            raise TypeError("drop_columns must be a list or pandas.Index")
        features = df.drop(columns=drop_columns)
    else:
        features = df.copy()

    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)

    return pd.DataFrame(scaled_features, columns=features.columns), df

In [19]:
def create_dataloader(features, batch_size=64):
    tensor_data = torch.tensor(features.values, dtype=torch.float32)
    dataset = TensorDataset(tensor_data, tensor_data)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [20]:
def train_vae(model, dataloader, epochs=20, lr=1e-3, checkpoint_path='best_vae_model.pt'):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    best_loss = float('inf')
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch, _ in dataloader:
            optimizer.zero_grad()
            recon_batch, mu, logvar = model(batch)
            loss = loss_function(recon_batch, batch, mu, logvar)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(dataloader.dataset)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), checkpoint_path)
            print("Best model saved.")

In [21]:
def main():
    dataset_path = 'vae_train.csv'
    features, labels = preprocess_data(dataset_path)
    dataloader = create_dataloader(features)
    input_dim = features.shape[1]
    latent_dim = 2
    vae = VAE(input_dim, latent_dim)
    train_vae(vae, dataloader, epochs=30, checkpoint_path='best_vae_model.pt')
if __name__ == "__main__":
    main()

Epoch 1/30, Loss: 7.0075
Best model saved.
Epoch 2/30, Loss: 7.0000
Best model saved.
Epoch 3/30, Loss: 7.0000
Epoch 4/30, Loss: 7.0000
Epoch 5/30, Loss: 7.0000
Epoch 6/30, Loss: 7.0000
Epoch 7/30, Loss: 7.0000
Epoch 8/30, Loss: 7.0000
Epoch 9/30, Loss: 7.0000
Epoch 10/30, Loss: 7.0000
Epoch 11/30, Loss: 7.0000
Epoch 12/30, Loss: 7.0000
Epoch 13/30, Loss: 7.0000
Epoch 14/30, Loss: 7.0000
Epoch 15/30, Loss: 7.0000
Epoch 16/30, Loss: 7.0000
Epoch 17/30, Loss: 7.0000
Epoch 18/30, Loss: 7.0000
Epoch 19/30, Loss: 7.0000
Epoch 20/30, Loss: 7.0000
Epoch 21/30, Loss: 7.0000
Epoch 22/30, Loss: 7.0000
Epoch 23/30, Loss: 7.0000
Epoch 24/30, Loss: 7.0000
Epoch 25/30, Loss: 7.0000
Epoch 26/30, Loss: 7.0000
Epoch 27/30, Loss: 7.0000
Epoch 28/30, Loss: 7.0000
Epoch 29/30, Loss: 7.0000
Epoch 30/30, Loss: 7.0000
