<a href="https://colab.research.google.com/github/pmxfa/sp-shapely/blob/main/sp_timevae_electricity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Training

In [None]:
!pip install synthcity tsbootstrap

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import sys
import warnings
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

import synthcity.logger as log
from synthcity.plugins import Plugins
from synthcity.plugins.core.dataloader import TimeSeriesDataLoader
from synthcity.utils.serialization import save_to_file, load_from_file

log.add(sink=sys.stderr, level="INFO")

Mounted at /content/drive


In [2]:
# Define file path
file_path = "/content/drive/Shareddrives/sp_env/datasets/Electricity Transformer Dataset (ETDataset)/ETTh1.csv"

df = pd.read_csv(file_path)
print(df.head())
print(df.info())
print(df.isnull().sum())

                  date   HUFL   HULL   MUFL   MULL   LUFL   LULL         OT
0  2016-07-01 00:00:00  5.827  2.009  1.599  0.462  4.203  1.340  30.531000
1  2016-07-01 01:00:00  5.693  2.076  1.492  0.426  4.142  1.371  27.787001
2  2016-07-01 02:00:00  5.157  1.741  1.279  0.355  3.777  1.218  27.787001
3  2016-07-01 03:00:00  5.090  1.942  1.279  0.391  3.807  1.279  25.044001
4  2016-07-01 04:00:00  5.358  1.942  1.492  0.462  3.868  1.279  21.948000
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17420 entries, 0 to 17419
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    17420 non-null  object 
 1   HUFL    17420 non-null  float64
 2   HULL    17420 non-null  float64
 3   MUFL    17420 non-null  float64
 4   MULL    17420 non-null  float64
 5   LUFL    17420 non-null  float64
 6   LULL    17420 non-null  float64
 7   OT      17420 non-null  float64
dtypes: float64(7), object(1)
memory usage: 1.1+ MB
None
date    0

In [3]:
# Convert 'date' to datetime, set as index, and sort
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)
df.sort_index(inplace=True)

# Keep the latest 5000 rows
df_latest = df.tail(5000)

# Train-test split: 70% for training (for TimeGAN), 30% for testing (TSTR)
train_size = int(0.7 * len(df_latest))
df_train = df_latest.iloc[:train_size]
df_test = df_latest.iloc[train_size:]  # use later for LSTM-TSTR

# Normalize the data
scaler = MinMaxScaler()
scaled_train = scaler.fit_transform(df_train)
df_scaled_train = pd.DataFrame(scaled_train, columns=df_train.columns, index=df_train.index)
scaled_test = scaler.transform(df_test)
df_scaled_test = pd.DataFrame(scaled_test, columns=df_test.columns, index=df_test.index)

# Sequence length for time-series data (dataset = hourly; 24 hours)
sequence_length = 24

In [None]:
temporal_data = []
observation_times = []

# Generate sequences from df_scaled_train only
for start in range(len(df_scaled_train) - sequence_length + 1):
    sequence = df_scaled_train.iloc[start:start + sequence_length].reset_index(drop=True)
    temporal_data.append(sequence)
    observation_times.append(list(range(sequence_length)))  # relative time within the window

dummy_outcome = pd.DataFrame(np.zeros(len(temporal_data)), columns=["outcome"])

loader = TimeSeriesDataLoader(
    temporal_data=temporal_data,
    observation_times=observation_times,
    static_data=None,
    outcome=dummy_outcome,
)

# Print the loader info
print(f"TimeSeriesDataLoader created with {len(temporal_data)} sequences")

In [4]:
temporal_data_test = []
observation_times_test = []

# Generate sequences from df_scaled_test only
for start in range(len(df_scaled_test) - sequence_length + 1):
    sequence = df_scaled_test.iloc[start:start + sequence_length].reset_index(drop=True)
    temporal_data_test.append(sequence)
    observation_times_test.append(list(range(sequence_length)))  # relative time within the window

# Dummy outcome for TimeGAN (can be used in DataLoader)
dummy_outcome = pd.DataFrame(np.zeros(len(temporal_data_test)), columns=["outcome"])

# Create DataLoader for TimeGAN
loader_test = TimeSeriesDataLoader(
    temporal_data=temporal_data_test,
    observation_times=observation_times_test,
    static_data=None,
    outcome=dummy_outcome,
)

# Print the loader info
print(f"TimeSeriesDataLoader TEST SET created with {len(temporal_data_test)} sequences")

TimeSeriesDataLoader TEST SET created with 1477 sequences


In [None]:
print(len(df_scaled_train))  # Check the length of the dataframe
print(loader.dataframe())

3500
       seq_id  seq_time_id  seq_temporal_HUFL  seq_temporal_HULL  \
0           0            0          -0.144002          -0.872259   
1           0            1          -0.285107          -0.086752   
2           0            2          -0.073391           0.031162   
3           0            3          -0.120465          -0.322580   
4           0            4           0.193244          -0.047447   
...       ...          ...                ...                ...   
83443    3476           19           0.820780          -0.440494   
83444    3476           20           0.938348          -0.440494   
83445    3476           21           0.930502           0.109772   
83446    3476           22           1.252174           0.424209   
83447    3476           23           0.577565          -0.126057   

       seq_temporal_LUFL  seq_temporal_LULL  seq_temporal_MUFL  \
0              -0.437390           0.611581          -0.114356   
1              -0.437390           1.116499   

All available hyperparameters are listed below:

model = plugin_timegan.TimeGAN( n_iter=500, generator_n_layers_hidden=3, generator_n_units_hidden=100, generator_nonlin="relu", generator_dropout=0.1, discriminator_n_layers_hidden=3, discriminator_n_units_hidden=100, discriminator_nonlin="leaky_relu", discriminator_dropout=0.1, discriminator_n_iter=2, lr=0.0002, weight_decay=0.0001, batch_size=200, encoder_max_clusters=10, mode="LSTM", gamma_penalty=10.0, moments_penalty=10.0, embedding_penalty=5.0 )

In [None]:
syn_model = Plugins().get("timevae")

[2025-04-18T04:16:02.646811+0000][1362][CRITICAL] module disabled: /usr/local/lib/python3.11/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2025-04-18T04:16:02.646811+0000][1362][CRITICAL] module disabled: /usr/local/lib/python3.11/dist-packages/synthcity/plugins/generic/plugin_goggle.py


In [None]:
# --- Print all parameters of initialized model ---
for attr in dir(syn_model):
    if not attr.startswith("_") and not callable(getattr(syn_model, attr)):
        print(f"{attr}: {getattr(syn_model, attr)}")

batch_size: 64
class_name: TimeVAEPlugin
clipping_value: 0
compress_dataset: False
decoder_batch_norm: False
decoder_dropout: 0.01
decoder_n_layers_hidden: 2
decoder_n_units_hidden: 150
decoder_nonlin: leaky_relu
decoder_nonlin_out_continuous: tanh
decoder_nonlin_out_discrete: softmax
decoder_residual: True
device: cuda
embedding_penalty: 10
encoder: None
encoder_batch_norm: False
encoder_dropout: 0.1
encoder_max_clusters: 20
encoder_n_layers_hidden: 3
encoder_n_units_hidden: 300
encoder_nonlin: leaky_relu
expecting_conditional: False
fitted: False
gamma_penalty: 1
lr: 0.001
mode: LSTM
module_name: synthcity.plugins.time_series.plugin_timevae
module_relative_path: ../time_series/plugin_timevae.py
moments_penalty: 100
n_iter: 1000
n_iter_print: 10
outcome_encoder: TabularEncoder(cat_encoder_params={'handle_unknown': 'ignore',
                                   'sparse_output': False},
               categorical_encoder='onehot',
               cont_encoder_params={'n_components': 20},
 

## fitting the model

In [None]:
print(loader.shape)
# Train the model
syn_model.fit(loader)

<synthcity.plugins.time_series.plugin_timevae.TimeVAEPlugin at 0x79ed2f4119d0>

In [None]:
save_to_file('/content/drive/Shareddrives/sp_env/saved_models/VAE_Electricity.pkl', syn_model)

In [None]:
# Generate Synthetic Data
n_samples = len(temporal_data)
syn_data = syn_model.generate(count=n_samples)
print(syn_data.shape)

(83448, 10)


In [None]:
# --- Save with automated format ---
import datetime
import os
# Get the current date and time
now = datetime.datetime.now()
timestamp = now.strftime("%m%d%y-%H%M%S")  # MMDDYY-HHMMSS format

# Define the base directory
base_dir = "/content/drive/Shareddrives/sp_env/synthetic_datasets/TimeVAE/electricity"  #CHANGE THIS
if not os.path.exists(base_dir):
    os.makedirs(base_dir)

# Construct the filename
model_name = type(syn_model).__name__.lower() # Get model name dynamically
filename = f"{timestamp}-{model_name}-n_3000.csv"
filepath = os.path.join(base_dir, filename)

# Save the data
df_syn = syn_data.dataframe()
df_syn.to_csv(filepath, index=False)

print(f"Synthetic data saved to: {filepath}")

Synthetic data saved to: /content/drive/Shareddrives/sp_env/synthetic_datasets/TimeVAE/electricity/041825-045510-timevaeplugin-n_3000.csv


# Evaluation

## Prerequisites

In [5]:
syn_data = pd.read_csv('/content/drive/Shareddrives/sp_env/synthetic_datasets/TimeVAE/electricity/050125-080355-timevaeplugin-n_3000.csv')

In [6]:
selected_columns = ['seq_temporal_HUFL', 'seq_temporal_HULL', 'seq_temporal_LUFL', 'seq_temporal_LULL', 'seq_temporal_MUFL', 'seq_temporal_MULL', 'seq_temporal_OT']

real_data = loader_test.dataframe()[selected_columns].to_numpy()
synthetic_data = syn_data[selected_columns].to_numpy()

In [7]:
print(real_data, "\n ------------------------------------------------------- \n", synthetic_data)
print(type(real_data),type(synthetic_data))
print(real_data.shape,synthetic_data.shape)

[[0.64238779 0.32967478 0.42980559 ... 0.67287374 0.33964623 0.67070109]
 [0.31229163 0.50270358 0.36846652 ... 0.32249713 0.55234947 0.69878968]
 [0.54529643 0.36752483 0.42102231 ... 0.56598494 0.38091834 0.75502396]
 ...
 [0.56471953 0.49729644 0.53506119 ... 0.56506958 0.4952653  0.79921214]
 [0.72005606 0.49729644 0.52627788 ... 0.72954808 0.4952653  0.77106644]
 [0.74109776 0.47566783 0.53506119 ... 0.74814312 0.44765052 0.75902034]] 
 ------------------------------------------------------- 
 [[0.30760782 0.57658813 0.27270248 ... 0.39191547 0.54298586 0.26333621]
 [0.67097521 0.20852661 0.18985192 ... 0.82835083 0.16442728 0.50736484]
 [0.86648357 0.57658813 0.44552911 ... 0.22188361 0.22552743 0.21296792]
 ...
 [0.7292462  0.56399562 0.55672249 ... 0.23446279 0.26941127 0.42007934]
 [0.12220015 0.50859989 0.63483016 ... 0.75053512 0.53313153 0.49799705]
 [0.22886082 0.32468576 0.33904982 ... 0.39191547 0.49962584 0.21296792]]
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(354

## Generate distance metrics

### Helper Functions

In [8]:
from scipy.stats import wasserstein_distance, entropy
import numpy as np

def compute_wasserstein(real_data, synthetic_data, selected_columns):
    """
    Computes Wasserstein Distance between real and synthetic time-series data.

    """

    # Ensure both datasets have the same number of samples
    min_length = min(len(real_data), len(synthetic_data))
    real_trimmed = real_data[:min_length]  # Keep original order (no random sampling)
    synthetic_trimmed = synthetic_data[:min_length]  # Match size
    print(real_trimmed.shape,synthetic_trimmed.shape)

    wasserstein_results = {}

    # Compute Wasserstein Distance for each feature
    for i, col in enumerate(selected_columns):
        w_dist = wasserstein_distance(real_trimmed[:, i], synthetic_trimmed[:, i])
        wasserstein_results[col] = w_dist
        print(f"{w_dist}")

    return wasserstein_results

def compute_kl_divergence(real_data, synthetic_data, selected_columns, bins=50):
    """
    Computes KL Divergence between real and synthetic time-series data.

    """

    # Ensure both datasets have the same number of samples
    min_length = min(len(real_data), len(synthetic_data))
    real_trimmed = real_data[:min_length]  # Keep original order
    synthetic_trimmed = synthetic_data[:min_length]  # Match size

    kl_results = {}

    for i, col in enumerate(selected_columns):
        # Compute histogram-based probability distributions
        real_hist, _ = np.histogram(real_trimmed[:, i], bins=bins, density=True)
        synth_hist, _ = np.histogram(synthetic_trimmed[:, i], bins=bins, density=True)

        # Avoid zero probabilities (KL Divergence is undefined for zero values)
        real_hist += 1e-10
        synth_hist += 1e-10

        # Compute KL Divergence
        kl_div = entropy(real_hist, synth_hist)
        kl_results[col] = kl_div
        print(f"{kl_div}")

    return kl_results

### Generate Metrics

In [9]:
# Assuming df_scaled is the DataFrame containing your scaled ETD data

# Compute Wasserstein Distance
wasserstein_results = compute_wasserstein(real_data, synthetic_data, selected_columns)
print("Wasserstein Distance Results:")
print(wasserstein_results)

# Compute KL Divergence
kl_results = compute_kl_divergence(real_data, synthetic_data, selected_columns)
print("KL Divergence Results:")
print(kl_results)

(35448, 7) (35448, 7)
0.058734213769422866
0.05437147388507411
0.03090487289559117
0.15800403282609599
0.09197090101166529
0.04352354669699914
0.275193848719602
Wasserstein Distance Results:
{'seq_temporal_HUFL': 0.058734213769422866, 'seq_temporal_HULL': 0.05437147388507411, 'seq_temporal_LUFL': 0.03090487289559117, 'seq_temporal_LULL': 0.15800403282609599, 'seq_temporal_MUFL': 0.09197090101166529, 'seq_temporal_MULL': 0.04352354669699914, 'seq_temporal_OT': 0.275193848719602}
12.919072711301745
14.695588215919834
14.20059341374937
11.832415828098034
13.931592884528987
13.462827448375341
14.32022259102309
KL Divergence Results:
{'seq_temporal_HUFL': 12.919072711301745, 'seq_temporal_HULL': 14.695588215919834, 'seq_temporal_LUFL': 14.20059341374937, 'seq_temporal_LULL': 11.832415828098034, 'seq_temporal_MUFL': 13.931592884528987, 'seq_temporal_MULL': 13.462827448375341, 'seq_temporal_OT': 14.32022259102309}


# LSTM downstream

In [10]:
real_data = loader_test.dataframe()
df_synth = pd.read_csv('/content/drive/Shareddrives/sp_env/synthetic_datasets/TimeVAE/electricity/050125-080355-timevaeplugin-n_3000.csv')

# drop unwanted column
real_data = real_data.drop(columns=["seq_id", "seq_time_id", "seq_out_outcome"], errors="ignore")
df_synth = df_synth.drop(columns=["seq_id", "seq_time_id", "seq_out_outcome"], errors="ignore")

print(f"real_data: {real_data.shape}, synthetic_data: {df_synth.shape}")

real_data: (35448, 7), synthetic_data: (83448, 7)


In [11]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [12]:
# Convert to tensors (float32 for PyTorch)
data_real = torch.tensor(real_data.values, dtype=torch.float32)
data_synth = torch.tensor(df_synth.values, dtype=torch.float32)

#  Sequence builder
def make_sequences(data, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len])
    return torch.stack(X), torch.stack(y)

SEQ_LEN = sequence_length

# Sequences for synthetic (train)
X_train, y_train = make_sequences(data_synth, SEQ_LEN)
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)

# Sequences for real (test)
X_test, y_test = make_sequences(data_real, SEQ_LEN)

In [13]:
# ─── Model Definition ──────────────────────────────────────
class ShallowLSTM(nn.Module):
    def __init__(self, input_size, hidden_size=64):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, input_size)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)  # hn shape: (1, batch, hidden_size)
        out = self.linear(hn.squeeze(0))  # squeeze to (batch, hidden_size)
        return out


# ─── Model Init ─────────────────────────────────────────────
model = ShallowLSTM(input_size=X_train.shape[2], hidden_size=64)

# ─── Optimizer & Loss ───────────────────────────────────────
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

In [14]:
# ─── Training ───────────────────────────────────────────────
EPOCHS = 50
for epoch in range(1, EPOCHS + 1):
    model.train()
    for xb, yb in train_loader:
        pred = model(xb)
        loss = loss_fn(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # if epoch % 10 == 0 or epoch == 1:
    print(f"Epoch {epoch}: Train MSE = {loss.item():.6f}")

Epoch 1: Train MSE = 0.045914
Epoch 2: Train MSE = 0.035705
Epoch 3: Train MSE = 0.043462
Epoch 4: Train MSE = 0.046102
Epoch 5: Train MSE = 0.043530
Epoch 6: Train MSE = 0.044796
Epoch 7: Train MSE = 0.046171
Epoch 8: Train MSE = 0.042200
Epoch 9: Train MSE = 0.041579
Epoch 10: Train MSE = 0.036881
Epoch 11: Train MSE = 0.047748
Epoch 12: Train MSE = 0.047610
Epoch 13: Train MSE = 0.047467
Epoch 14: Train MSE = 0.043663
Epoch 15: Train MSE = 0.041586
Epoch 16: Train MSE = 0.044556
Epoch 17: Train MSE = 0.040909
Epoch 18: Train MSE = 0.042399
Epoch 19: Train MSE = 0.045862
Epoch 20: Train MSE = 0.042875
Epoch 21: Train MSE = 0.042828
Epoch 22: Train MSE = 0.040257
Epoch 23: Train MSE = 0.046970
Epoch 24: Train MSE = 0.040602
Epoch 25: Train MSE = 0.040733
Epoch 26: Train MSE = 0.043052
Epoch 27: Train MSE = 0.046107
Epoch 28: Train MSE = 0.045542
Epoch 29: Train MSE = 0.047378
Epoch 30: Train MSE = 0.037825
Epoch 31: Train MSE = 0.040177
Epoch 32: Train MSE = 0.045568
Epoch 33: Train M

In [15]:
MODEL_SAVE_PATH = '/content/drive/Shareddrives/sp_env/saved_models/LSTM/tstr_VAE_electricity.pth'

# Save only the model's learned parameters (state_dict)
torch.save(model.state_dict(), MODEL_SAVE_PATH)

print(f"Model saved to: {MODEL_SAVE_PATH}")

Model saved to: /content/drive/Shareddrives/sp_env/saved_models/LSTM/tstr_VAE_electricity.pth


In [16]:
#@title ✧.* model evaluation ✧.*
model.eval()
with torch.no_grad():
    preds = model(X_test)
    test_mse = loss_fn(preds, y_test).item()
    test_mae = mean_absolute_error(y_test.numpy(), preds.numpy())

    print(f"Test MSE: {test_mse:.6f}")
    print(f"Test MAE: {test_mae:.6f}")

Test MSE: 0.042392
Test MAE: 0.170565


# Bootstrapping Sample

In [17]:
!pip install tsbootstrap



In [18]:
len(loader_test)

35448

In [19]:
from tsbootstrap import MovingBlockBootstrap
import numpy as np

C_CONSTANT = 1
n = len(loader_test)
calculated_block_length = int(np.round(C_CONSTANT * (n**(1/3))))
print(calculated_block_length)

bootstrap_configs = {
    "weather": {"block_length": calculated_block_length, "n_bootstraps": 15, "rng": 42},       # 6-hour pattern (10-min interval)
    "electricity": {"block_length": calculated_block_length, "n_bootstraps": 15, "rng": 42},   # 1-day pattern (hourly)
    "exchange": {"block_length": calculated_block_length, "n_bootstraps": 15, "rng": 42},      # 1-month pattern (daily)
}

# Example for weather
dataset_name = "electricity"
config = bootstrap_configs[dataset_name]

real_test_array = real_data # shape (N, features)
mbb = MovingBlockBootstrap(
    n_bootstraps=config["n_bootstraps"],
    rng=config["rng"],
    block_length=config["block_length"]
)
boot_samples = mbb.bootstrap(real_test_array, return_indices=False)


33


In [20]:
model.load_state_dict(torch.load(MODEL_SAVE_PATH))

<All keys matched successfully>

In [None]:
bootstrap_results = []

for b_idx, boot_real in enumerate(boot_samples):
    # 1. Match the synthetic data size
    syn_trimmed = synthetic_data[:len(boot_real)]

    # 2. Fidelity metrics
    wasserstein = compute_wasserstein(boot_real, syn_trimmed, selected_columns)
    kl = compute_kl_divergence(boot_real, syn_trimmed, selected_columns)

    # 3. Utility metrics
    # Preprocess this bootstrap sample for LSTM (as you do with real_data)
    boot_tensor = torch.tensor(boot_real, dtype=torch.float32)
    Xb_test, yb_test = make_sequences(boot_tensor, SEQ_LEN)

    model.eval()
    with torch.no_grad():
        preds = model(Xb_test)
        mse = mean_squared_error(yb_test.numpy(), preds.numpy())
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(yb_test.numpy(), preds.numpy())

    # 4. Store results
    bootstrap_results.append({
        'bootstrap': b_idx,
        'wasserstein': np.mean(list(wasserstein.values())),
        'kl': np.mean(list(kl.values())),
        'rmse': rmse,
        'mae': mae
    })

(35448, 7) (35448, 7)
0.05529728018845923
0.05430524160959064
0.030452205368524367
0.16116423508964609
0.08826626519400188
0.042960643485044164
0.2759711920207977
13.038326810110997
14.713579206776958
14.195222796077719
11.784235036321466
14.022001236787123
13.256867363354935
14.25434059928865
(35448, 7) (35448, 7)
0.055053147645018806
0.0506483605938061
0.030647277832964805
0.15399998982705912
0.08800851081530889
0.04229399923280342
0.2699039740665348
12.997171575838031
14.659077548153169
14.176286800407732
11.66321913113126
13.801527873053654
13.288486114379403
14.30577152735018
(35448, 7) (35448, 7)
0.057823317781668014
0.05269647031331238
0.031151407249197387
0.15899085475685668
0.09069026562581499
0.04215539668391499
0.2758364310034349
12.916862387140029
14.71137646762695
14.239391178477579
11.838745456581814
14.015294370532526
13.547660325019036
14.337389912585358
(35448, 7) (35448, 7)
0.060018413604816856
0.05412596743618572
0.030559088165501697
0.15951986993457257
0.09326953288

In [None]:
print(bootstrap_results)

# Assuming bootstrap_results is your list of dicts
df_results = pd.DataFrame(bootstrap_results)

df_results['Dataset'] = 'electricity'
df_results['Model'] = 'TimeVAE'

df_results

[{'bootstrap': 0, 'wasserstein': 0.07323141194763165, 'kl': 12.846438609554097, 'rmse': 0.17522812, 'mae': 0.14308257}, {'bootstrap': 1, 'wasserstein': 0.07352712310986735, 'kl': 12.881357757419664, 'rmse': 0.1744337, 'mae': 0.14231618}, {'bootstrap': 2, 'wasserstein': 0.07472629509668247, 'kl': 12.845046054470235, 'rmse': 0.17401692, 'mae': 0.14200984}, {'bootstrap': 3, 'wasserstein': 0.07213636150413183, 'kl': 12.908230518238838, 'rmse': 0.17593919, 'mae': 0.14358655}, {'bootstrap': 4, 'wasserstein': 0.07254918520736926, 'kl': 12.90596188845117, 'rmse': 0.17640515, 'mae': 0.14459266}, {'bootstrap': 5, 'wasserstein': 0.07164750332783064, 'kl': 12.871000882557556, 'rmse': 0.17530641, 'mae': 0.1432686}, {'bootstrap': 6, 'wasserstein': 0.07365491968045372, 'kl': 12.82351955409776, 'rmse': 0.17456543, 'mae': 0.14284965}, {'bootstrap': 7, 'wasserstein': 0.07173975057797534, 'kl': 12.883252847040266, 'rmse': 0.17664504, 'mae': 0.14436296}, {'bootstrap': 8, 'wasserstein': 0.07155771479110927

Unnamed: 0,bootstrap,wasserstein,kl,rmse,mae,Dataset,Model
0,0,0.073231,12.846439,0.175228,0.143083,electricity,TimeVAE
1,1,0.073527,12.881358,0.174434,0.142316,electricity,TimeVAE
2,2,0.074726,12.845046,0.174017,0.14201,electricity,TimeVAE
3,3,0.072136,12.908231,0.175939,0.143587,electricity,TimeVAE
4,4,0.072549,12.905962,0.176405,0.144593,electricity,TimeVAE
5,5,0.071648,12.871001,0.175306,0.143269,electricity,TimeVAE
6,6,0.073655,12.82352,0.174565,0.14285,electricity,TimeVAE
7,7,0.07174,12.883253,0.176645,0.144363,electricity,TimeVAE
8,8,0.071558,12.913056,0.175753,0.143831,electricity,TimeVAE
9,9,0.072526,12.873824,0.176269,0.144065,electricity,TimeVAE


In [None]:
summary_row = {
    'Dataset': 'electricity',
    'Model': 'TimeVAE',
    'Wasserstein': df_results['wasserstein'].mean(),
    'KL': df_results['kl'].mean(),
    'RMSE': df_results['rmse'].mean(),
    'MAE': df_results['mae'].mean()
}

df_summary = pd.DataFrame([summary_row])
print(df_summary)

       Dataset    Model  Wasserstein         KL      RMSE       MAE
0  electricity  TimeVAE     0.072608  12.873615  0.175492  0.143382
