<a href="https://colab.research.google.com/github/pmxfa/sp-shapely/blob/main/sp_timevae_exchange.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install synthcity

# Training

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import sys
import warnings
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

import synthcity.logger as log
from synthcity.plugins import Plugins
from synthcity.plugins.core.dataloader import TimeSeriesDataLoader
from synthcity.utils.serialization import save_to_file, load_from_file

log.add(sink=sys.stderr, level="INFO")

Mounted at /content/drive


In [2]:
# Define file path
file_path = "/content/drive/Shareddrives/sp_env/datasets/Exchange Rate/exchange_rate.txt"

df = pd.read_csv(file_path)
print(df.head())
print(df.info())
print(df.isnull().sum())

   0.785500  1.611000  0.861698  0.634196  0.211242  0.006838  0.593000  \
0    0.7818    1.6100  0.861104  0.633513  0.211242  0.006863    0.5940   
1    0.7867    1.6293  0.861030  0.648508  0.211242  0.006975    0.5973   
2    0.7860    1.6370  0.862069  0.650618  0.211242  0.006953    0.5970   
3    0.7849    1.6530  0.861995  0.656254  0.211242  0.006940    0.5985   
4    0.7866    1.6537  0.861030  0.654879  0.211242  0.006887    0.6040   

   0.525486  
0  0.523972  
1  0.526316  
2  0.523834  
3  0.527426  
4  0.526177  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7587 entries, 0 to 7586
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   0.785500  7587 non-null   float64
 1   1.611000  7587 non-null   float64
 2   0.861698  7587 non-null   float64
 3   0.634196  7587 non-null   float64
 4   0.211242  7587 non-null   float64
 5   0.006838  7587 non-null   float64
 6   0.593000  7587 non-null   float64
 7   0.5

In [3]:
# for exchange dataset only
df.columns = [
    'Australia', 'British_Pound', 'Canada', 'Switzerland',
    'China', 'Japan', 'New_Zealand', 'Singapore'
]

# set seed and randomly choose 2 features
np.random.seed(42)

# Filter the dataframe to only include selected features
# df_latest = df_latest[selected_features]
selected_features = np.random.choice(df.columns, size=2, replace=False)
print(f"Selected features: {selected_features}")

# Immediately filter your full DataFrame down to just those two series:
df = df[selected_features]

# Set the index as a daily date range starting from 1990-01-01
df.index = pd.date_range(start='1990-01-01', periods=len(df), freq='D')

# Optional: show the first few rows
print(df.head())

Selected features: ['British_Pound' 'Japan']
            British_Pound     Japan
1990-01-01         1.6100  0.006863
1990-01-02         1.6293  0.006975
1990-01-03         1.6370  0.006953
1990-01-04         1.6530  0.006940
1990-01-05         1.6537  0.006887


In [4]:
print(len(df))

7587


In [8]:
# Keep the latest 5000 rows
df_latest = df.tail(5000)

# Train-test split: 70% for training (for TimeGAN), 30% for testing (TSTR)
train_size = int(0.7 * len(df_latest))
df_train = df_latest.iloc[:train_size]
df_test = df_latest.iloc[train_size:]  # use later for LSTM-TSTR

# Normalize the data
scaler = MinMaxScaler()
scaled_train = scaler.fit_transform(df_train)
df_scaled_train = pd.DataFrame(scaled_train, columns=df_train.columns, index=df_train.index)
scaled_test = scaler.transform(df_test)
df_scaled_test = pd.DataFrame(scaled_test, columns=df_test.columns, index=df_test.index)


# set sequence length to 30 to capture monthly patterns
# dataset = daily
sequence_length = 30

In [None]:
temporal_data = []
observation_times = []

for start in range(len(df_scaled_train) - sequence_length + 1):
    sequence = df_scaled_train.iloc[start:start + sequence_length].reset_index(drop=True)
    temporal_data.append(sequence)
    observation_times.append(list(range(sequence_length)))  # relative time within the window


# Dummy outcome for data loader
dummy_outcome = pd.DataFrame(np.zeros(len(temporal_data)), columns=["outcome"])

# --- Create DataLoader for TimeGAN ---
loader = TimeSeriesDataLoader(
    temporal_data=temporal_data,  # List of sequences (DataFrames)
    observation_times=observation_times,  # List of time indices (DataFrames)
    static_data=None,  # No static data for now (can be set if needed)
    outcome=dummy_outcome,  # Dummy outcome for forecasting
)

# Print the loader info
print(f"TimeSeriesDataLoader created with {len(temporal_data)} sequences")

In [9]:
temporal_data_test = []
observation_times_test = []

# Generate sequences from df_scaled_test only
for start in range(len(df_scaled_test) - sequence_length + 1):
    sequence = df_scaled_test.iloc[start:start + sequence_length].reset_index(drop=True)
    temporal_data_test.append(sequence)
    observation_times_test.append(list(range(sequence_length)))  # relative time within the window

# Dummy outcome for TimeGAN (can be used in DataLoader)
dummy_outcome = pd.DataFrame(np.zeros(len(temporal_data_test)), columns=["outcome"])

# Create DataLoader for TimeGAN
loader_test = TimeSeriesDataLoader(
    temporal_data=temporal_data_test,
    observation_times=observation_times_test,
    static_data=None,
    outcome=dummy_outcome,
)

# Print the loader info
print(f"TimeSeriesDataLoader TEST SET created with {len(temporal_data_test)} sequences")

TimeSeriesDataLoader TEST SET created with 1471 sequences


In [None]:
print(len(df_scaled_test))  # Check the length of the dataframe
print(loader.dataframe())

3500
        seq_id  seq_time_id  seq_temporal_British_Pound  seq_temporal_Japan  \
0            0            0                    0.175634            0.327109   
1            0            1                    0.177668            0.321058   
2            0            2                    0.168709            0.315871   
3            0            3                    0.163281            0.314661   
4            0            4                    0.165788            0.318638   
...        ...          ...                         ...                 ...   
104125    3470           25                    0.325966            0.882434   
104126    3470           26                    0.327503            0.875692   
104127    3470           27                    0.313072            0.865318   
104128    3470           28                    0.312271            0.866010   
104129    3470           29                    0.305651            0.874308   

        seq_out_outcome  
0                   

All available hyperparameters are listed below:

model = plugin_timegan.TimeGAN( n_iter=500, generator_n_layers_hidden=3, generator_n_units_hidden=100, generator_nonlin="relu", generator_dropout=0.1, discriminator_n_layers_hidden=3, discriminator_n_units_hidden=100, discriminator_nonlin="leaky_relu", discriminator_dropout=0.1, discriminator_n_iter=2, lr=0.0002, weight_decay=0.0001, batch_size=200, encoder_max_clusters=10, mode="LSTM", gamma_penalty=10.0, moments_penalty=10.0, embedding_penalty=5.0 )

In [None]:
syn_model = Plugins().get("timevae")

[2025-05-01T07:02:34.357757+0000][1547][CRITICAL] module disabled: /usr/local/lib/python3.11/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2025-05-01T07:02:34.357757+0000][1547][CRITICAL] module disabled: /usr/local/lib/python3.11/dist-packages/synthcity/plugins/generic/plugin_goggle.py


In [None]:
# Print all parameters of initialized model
for attr in dir(syn_model):
    if not attr.startswith("_") and not callable(getattr(syn_model, attr)):
        print(f"{attr}: {getattr(syn_model, attr)}")

batch_size: 64
class_name: TimeVAEPlugin
clipping_value: 0
compress_dataset: False
decoder_batch_norm: False
decoder_dropout: 0.01
decoder_n_layers_hidden: 2
decoder_n_units_hidden: 150
decoder_nonlin: leaky_relu
decoder_nonlin_out_continuous: tanh
decoder_nonlin_out_discrete: softmax
decoder_residual: True
device: cuda
embedding_penalty: 10
encoder: None
encoder_batch_norm: False
encoder_dropout: 0.1
encoder_max_clusters: 20
encoder_n_layers_hidden: 3
encoder_n_units_hidden: 300
encoder_nonlin: leaky_relu
expecting_conditional: False
fitted: False
gamma_penalty: 1
lr: 0.001
mode: LSTM
module_name: synthcity.plugins.time_series.plugin_timevae
module_relative_path: ../time_series/plugin_timevae.py
moments_penalty: 100
n_iter: 1000
n_iter_print: 10
outcome_encoder: TabularEncoder(cat_encoder_params={'handle_unknown': 'ignore',
                                   'sparse_output': False},
               categorical_encoder='onehot',
               cont_encoder_params={'n_components': 20},
 

## fitting the model

In [None]:
print(loader.shape)

# Train the model
syn_model.fit(loader)

save_to_file('/content/drive/Shareddrives/sp_env/saved_models/VAE_Exchange.pkl', syn_model)

(104130, 5)


In [None]:
# --- Generate Synthetic Data ---
n_samples = len(temporal_data)
syn_data = syn_model.generate(count=n_samples)
print(syn_data.shape)

(104130, 5)


In [None]:
# --- Save with automated format ---
import datetime
import os
# Get the current date and time
now = datetime.datetime.now()
timestamp = now.strftime("%m%d%y-%H%M%S")  # MMDDYY-HHMMSS format

# Define the base directory
base_dir = "/content/drive/Shareddrives/sp_env/synthetic_datasets/TimeVAE/exchange"  #CHANGE THIS
if not os.path.exists(base_dir):
    os.makedirs(base_dir)

# Construct the filename
model_name = type(syn_model).__name__.lower() # Get model name dynamically
filename = f"{timestamp}-{model_name}-3500.csv"
filepath = os.path.join(base_dir, filename)

# Save the data
df_syn = syn_data.dataframe()
df_syn.to_csv(filepath, index=False)

print(f"Synthetic data saved to: {filepath}")

Synthetic data saved to: /content/drive/Shareddrives/sp_env/synthetic_datasets/TimeVAE/exchange/050125-072914-timevaeplugin-3500.csv


# Evaluation

## Prerequisites

In [10]:
syn_data = pd.read_csv('/content/drive/Shareddrives/sp_env/synthetic_datasets/TimeVAE/exchange/050125-072914-timevaeplugin-3500.csv')

In [12]:
selected_columns = ['seq_temporal_British_Pound','seq_temporal_Japan']
# Ensure real_data and synthetic_data only contain the selected columns
real_data = loader_test.dataframe()[selected_columns].to_numpy()
synthetic_data = syn_data[selected_columns].to_numpy()

In [13]:
print(real_data, "\n ------------------------------------------------------- \n", synthetic_data)
print(type(real_data),type(synthetic_data))
print(real_data.shape,synthetic_data.shape)

[[ 0.30750644  0.87102351]
 [ 0.3059454   0.88191563]
 [ 0.3059454   0.88520055]
 ...
 [-0.18759935  0.197787  ]
 [-0.18787898  0.19657676]
 [-0.18787898  0.19657676]] 
 ------------------------------------------------------- 
 [[0.89194132 0.94418034]
 [0.35438725 0.63338469]
 [0.18833139 0.83318373]
 ...
 [0.44900523 0.29225317]
 [0.44900523 0.57334021]
 [0.23076167 0.57334021]]
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(44130, 2) (104130, 2)


## Generate distance metrics

### Helper Functions

In [14]:
from scipy.stats import wasserstein_distance, entropy
import numpy as np

def compute_wasserstein(real_data, synthetic_data, selected_columns):
    """
    Computes Wasserstein Distance between real and synthetic time-series data.

    """

    # Ensure both datasets have the same number of samples
    min_length = min(len(real_data), len(synthetic_data))
    real_trimmed = real_data[:min_length]  # Keep original order (no random sampling)
    synthetic_trimmed = synthetic_data[:min_length]  # Match size
    print(real_trimmed.shape,synthetic_trimmed.shape)

    wasserstein_results = {}

    # Compute Wasserstein Distance for each feature
    for i, col in enumerate(selected_columns):
        w_dist = wasserstein_distance(real_trimmed[:, i], synthetic_trimmed[:, i])
        wasserstein_results[col] = w_dist
        print(f"{w_dist}")

    return wasserstein_results

def compute_kl_divergence(real_data, synthetic_data, selected_columns, bins=50):
    """
    Computes KL Divergence between real and synthetic time-series data.

    """

    # Ensure both datasets have the same number of samples
    min_length = min(len(real_data), len(synthetic_data))
    real_trimmed = real_data[:min_length]  # Keep original order
    synthetic_trimmed = synthetic_data[:min_length]  # Match size

    kl_results = {}

    for i, col in enumerate(selected_columns):
        # Compute histogram-based probability distributions
        real_hist, _ = np.histogram(real_trimmed[:, i], bins=bins, density=True)
        synth_hist, _ = np.histogram(synthetic_trimmed[:, i], bins=bins, density=True)

        # Avoid zero probabilities (KL Divergence is undefined for zero values)
        real_hist += 1e-10
        synth_hist += 1e-10

        # Compute KL Divergence
        kl_div = entropy(real_hist, synth_hist)
        kl_results[col] = kl_div
        print(f"{kl_div}")

    return kl_results

### Generate Metrics

In [15]:
wasserstein_results = compute_wasserstein(real_data, synthetic_data, selected_columns)
print("Wasserstein Distance Results:")
print(wasserstein_results)

kl_results = compute_kl_divergence(real_data, synthetic_data, selected_columns)
print("KL Divergence Results:")
print(kl_results)

(44130, 2) (44130, 2)
0.21537685601539203
0.14475753302922684
Wasserstein Distance Results:
{'seq_temporal_British_Pound': 0.21537685601539203, 'seq_temporal_Japan': 0.14475753302922684}
17.01785977924572
16.084912038127758
KL Divergence Results:
{'seq_temporal_British_Pound': 17.01785977924572, 'seq_temporal_Japan': 16.084912038127758}


# LSTM downstream

In [16]:
real_data = loader_test.dataframe()
df_synth = pd.read_csv("/content/drive/Shareddrives/sp_env/synthetic_datasets/TimeVAE/exchange/050125-072914-timevaeplugin-3500.csv")

# drop unwanted column
real_data = real_data.drop(columns=["seq_id", "seq_time_id", "seq_out_outcome"], errors="ignore")
df_synth = df_synth.drop(columns=["seq_id", "seq_time_id", "seq_out_outcome"], errors="ignore")

print(f"real_data: {real_data.shape}, synthetic_data: {df_synth.shape}")

real_data: (44130, 2), synthetic_data: (104130, 2)


In [17]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [18]:
# Convert to tensors (float32 for PyTorch)
data_real = torch.tensor(real_data.values, dtype=torch.float32)
data_synth = torch.tensor(df_synth.values, dtype=torch.float32)

#  Sequence builder
def make_sequences(data, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len])
    return torch.stack(X), torch.stack(y)

SEQ_LEN = sequence_length

# Sequences for synthetic (train)
X_train, y_train = make_sequences(data_synth, SEQ_LEN)
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)

# Sequences for real (test)
X_test, y_test = make_sequences(data_real, SEQ_LEN)

In [19]:
# ─── Model Definition ──────────────────────────────────────
class ShallowLSTM(nn.Module):
    def __init__(self, input_size, hidden_size=64):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, input_size)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)  # hn shape: (1, batch, hidden_size)
        out = self.linear(hn.squeeze(0))  # squeeze to (batch, hidden_size)
        return out


# ─── Model Init ─────────────────────────────────────────────
model = ShallowLSTM(input_size=X_train.shape[2], hidden_size=64)

# ─── Optimizer & Loss ───────────────────────────────────────
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

In [20]:
# ─── Training ───────────────────────────────────────────────
EPOCHS = 50
for epoch in range(1, EPOCHS + 1):
    model.train()
    for xb, yb in train_loader:
        pred = model(xb)
        loss = loss_fn(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # if epoch % 10 == 0 or epoch == 1:
    print(f"Epoch {epoch}: Train MSE = {loss.item():.6f}")

Epoch 1: Train MSE = 0.051888
Epoch 2: Train MSE = 0.025994
Epoch 3: Train MSE = 0.069100
Epoch 4: Train MSE = 0.108610
Epoch 5: Train MSE = 0.053252
Epoch 6: Train MSE = 0.113041
Epoch 7: Train MSE = 0.106269
Epoch 8: Train MSE = 0.053315
Epoch 9: Train MSE = 0.069478
Epoch 10: Train MSE = 0.102155
Epoch 11: Train MSE = 0.047958
Epoch 12: Train MSE = 0.076143
Epoch 13: Train MSE = 0.084339
Epoch 14: Train MSE = 0.041524
Epoch 15: Train MSE = 0.079919
Epoch 16: Train MSE = 0.075968
Epoch 17: Train MSE = 0.084748
Epoch 18: Train MSE = 0.111051
Epoch 19: Train MSE = 0.086398
Epoch 20: Train MSE = 0.093235
Epoch 21: Train MSE = 0.044364
Epoch 22: Train MSE = 0.057127
Epoch 23: Train MSE = 0.031280
Epoch 24: Train MSE = 0.121597
Epoch 25: Train MSE = 0.067504
Epoch 26: Train MSE = 0.043946
Epoch 27: Train MSE = 0.077169
Epoch 28: Train MSE = 0.056605
Epoch 29: Train MSE = 0.063538
Epoch 30: Train MSE = 0.079572
Epoch 31: Train MSE = 0.061354
Epoch 32: Train MSE = 0.074685
Epoch 33: Train M

In [21]:
MODEL_SAVE_PATH = '/content/drive/Shareddrives/sp_env/saved_models/LSTM/tstr_VAE_exchange.pth'

# Save only the model's learned parameters (state_dict)
torch.save(model.state_dict(), MODEL_SAVE_PATH)

print(f"Model saved to: {MODEL_SAVE_PATH}")

Model saved to: /content/drive/Shareddrives/sp_env/saved_models/LSTM/tstr_VAE_exchange.pth


In [22]:
#@title ✧.* model evaluation ✧.*
model.eval()
with torch.no_grad():
    preds = model(X_test)
    test_mse = loss_fn(preds, y_test).item()
    test_mae = mean_absolute_error(y_test.numpy(), preds.numpy())

    print(f"Test MSE: {test_mse:.6f}")
    print(f"Test MAE: {test_mae:.6f}")

Test MSE: 0.057347
Test MAE: 0.191393


In [None]:
!pip install tsbootstrap

In [23]:
len(loader_test)

44130

In [26]:
from tsbootstrap import MovingBlockBootstrap
import numpy as np

C_CONSTANT = 1
n = len(loader_test)
calculated_block_length = int(np.round(C_CONSTANT * (n**(1/3))))
print(calculated_block_length)

bootstrap_configs = {
    "weather": {"block_length": calculated_block_length, "n_bootstraps": 15, "rng": 42},       # 6-hour pattern (10-min interval)
    "electricity": {"block_length": calculated_block_length, "n_bootstraps": 15, "rng": 42},   # 1-day pattern (hourly)
    "exchange": {"block_length": calculated_block_length, "n_bootstraps": 15, "rng": 42},      # 1-month pattern (daily)
}

# Example for weather
dataset_name = "weather"
config = bootstrap_configs[dataset_name]

real_test_array = real_data # shape (N, features)
mbb = MovingBlockBootstrap(
    n_bootstraps=config["n_bootstraps"],
    rng=config["rng"],
    block_length=config["block_length"]
)
boot_samples = mbb.bootstrap(real_test_array, return_indices=False)


35


In [27]:
model.load_state_dict(torch.load(MODEL_SAVE_PATH))

<All keys matched successfully>

In [28]:
bootstrap_results = []

for b_idx, boot_real in enumerate(boot_samples):
    # 1. Match the synthetic data size
    syn_trimmed = synthetic_data[:len(boot_real)]

    # 2. Fidelity metrics
    wasserstein = compute_wasserstein(boot_real, syn_trimmed, selected_columns)
    kl = compute_kl_divergence(boot_real, syn_trimmed, selected_columns)

    # 3. Utility metrics
    # Preprocess this bootstrap sample for LSTM (as you do with real_data)
    boot_tensor = torch.tensor(boot_real, dtype=torch.float32)
    Xb_test, yb_test = make_sequences(boot_tensor, SEQ_LEN)

    model.eval()
    with torch.no_grad():
        preds = model(Xb_test)
        mse = mean_squared_error(yb_test.numpy(), preds.numpy())
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(yb_test.numpy(), preds.numpy())

    # 4. Store results
    bootstrap_results.append({
        'bootstrap': b_idx,
        'wasserstein': np.mean(list(wasserstein.values())),
        'kl': np.mean(list(kl.values())),
        'rmse': rmse,
        'mae': mae
    })

(44130, 2) (44130, 2)
0.21770328632170077
0.1448137899837803
16.85052690518597
16.058477603997716
(44130, 2) (44130, 2)
0.2182330815362547
0.14489440741049867
16.98792292602977
15.875836019108954
(44130, 2) (44130, 2)
0.218552476917958
0.14476726273109575
16.913053911440617
15.99586001670041
(44130, 2) (44130, 2)
0.20300957061803743
0.14458687028919529
17.31777220243694
16.15102051441206
(44130, 2) (44130, 2)
0.21276826948764369
0.1412879270670639
17.127523270125195
16.279897165076957
(44130, 2) (44130, 2)
0.21578306552301132
0.14715958202170018
17.006549313854315
15.971226670237446
(44130, 2) (44130, 2)
0.21829672586634402
0.14734800425957617
16.742045323541134
16.313229016586025
(44130, 2) (44130, 2)
0.21544128917975147
0.14493229780863226
17.170707923632932
16.12495257921139
(44130, 2) (44130, 2)
0.2195414003772076
0.14973988716929285
16.949080592448357
16.109059291784053
(44130, 2) (44130, 2)
0.2071759176868616
0.14981170349233144
17.29608776222599
16.122809114475334
(44130, 2) (44

In [29]:
print(bootstrap_results)

# Assuming bootstrap_results is your list of dicts
df_results = pd.DataFrame(bootstrap_results)

df_results['Dataset'] = 'exchange'
df_results['Model'] = 'TimeVAE'

df_results

[{'bootstrap': 0, 'wasserstein': 0.18125853815274054, 'kl': 16.454502254591844, 'rmse': 0.23948624274535438, 'mae': 0.19181044399738312}, {'bootstrap': 1, 'wasserstein': 0.1815637444733767, 'kl': 16.431879472569364, 'rmse': 0.24013693314240617, 'mae': 0.1910630464553833}, {'bootstrap': 2, 'wasserstein': 0.18165986982452687, 'kl': 16.454456964070513, 'rmse': 0.2410412086992411, 'mae': 0.19348469376564026}, {'bootstrap': 3, 'wasserstein': 0.17379822045361637, 'kl': 16.7343963584245, 'rmse': 0.23078483791829438, 'mae': 0.1844160258769989}, {'bootstrap': 4, 'wasserstein': 0.17702809827735377, 'kl': 16.703710217601078, 'rmse': 0.2362568803067926, 'mae': 0.18900267779827118}, {'bootstrap': 5, 'wasserstein': 0.18147132377235575, 'kl': 16.488887992045882, 'rmse': 0.2425638320569777, 'mae': 0.19411256909370422}, {'bootstrap': 6, 'wasserstein': 0.1828223650629601, 'kl': 16.527637170063578, 'rmse': 0.2435247587951548, 'mae': 0.19343525171279907}, {'bootstrap': 7, 'wasserstein': 0.1801867934941918

Unnamed: 0,bootstrap,wasserstein,kl,rmse,mae,Dataset,Model
0,0,0.181259,16.454502,0.239486,0.19181,exchange,TimeVAE
1,1,0.181564,16.431879,0.240137,0.191063,exchange,TimeVAE
2,2,0.18166,16.454457,0.241041,0.193485,exchange,TimeVAE
3,3,0.173798,16.734396,0.230785,0.184416,exchange,TimeVAE
4,4,0.177028,16.70371,0.236257,0.189003,exchange,TimeVAE
5,5,0.181471,16.488888,0.242564,0.194113,exchange,TimeVAE
6,6,0.182822,16.527637,0.243525,0.193435,exchange,TimeVAE
7,7,0.180187,16.64783,0.241794,0.194159,exchange,TimeVAE
8,8,0.184641,16.52907,0.241549,0.194014,exchange,TimeVAE
9,9,0.178494,16.709448,0.235,0.188665,exchange,TimeVAE


In [30]:
summary_row = {
    'Dataset': 'exchange',
    'Model': 'TimeVAE',
    'Wasserstein': df_results['wasserstein'].mean(),
    'KL': df_results['kl'].mean(),
    'RMSE': df_results['rmse'].mean(),
    'MAE': df_results['mae'].mean()
}

df_summary = pd.DataFrame([summary_row])
print(df_summary)

    Dataset    Model  Wasserstein         KL      RMSE       MAE
0  exchange  TimeVAE     0.180719  16.533493  0.240132  0.191968
