<a href="https://colab.research.google.com/github/pmxfa/sp-shapely/blob/main/sp_timegan_electricity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
!pip install synthcity

Collecting synthcity
  Using cached synthcity-0.2.12-py3-none-any.whl.metadata (37 kB)
Collecting torch<2.3,>=2.1 (from synthcity)
  Using cached torch-2.2.2-cp311-cp311-manylinux1_x86_64.whl.metadata (25 kB)
Collecting nflows>=0.14 (from synthcity)
  Using cached nflows-0.14.tar.gz (45 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting lifelines<0.30.0,>=0.29.0 (from synthcity)
  Using cached lifelines-0.29.0-py3-none-any.whl.metadata (3.2 kB)
Collecting opacus>=1.3 (from synthcity)
  Using cached opacus-1.5.3-py3-none-any.whl.metadata (8.4 kB)
Collecting networkx<3.0,>2.0 (from synthcity)
  Using cached networkx-2.8.8-py3-none-any.whl.metadata (5.1 kB)
Collecting decaf-synthetic-data>=0.1.6 (from synthcity)
  Using cached decaf_synthetic_data-0.1.6-py3-none-any.whl.metadata (2.5 kB)
Collecting optuna>=3.1 (from synthcity)
  Using cached optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting loguru (from synthcity)
  Using cached loguru-0.7.3-py3-none-any.whl.metad

# Training

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import sys
import warnings
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

import synthcity.logger as log
from synthcity.plugins import Plugins
from synthcity.plugins.core.dataloader import TimeSeriesDataLoader

log.add(sink=sys.stderr, level="INFO")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
file_path = "/content/drive/Shareddrives/sp_env/datasets/Electricity Transformer Dataset (ETDataset)/ETTh1.csv"

df = pd.read_csv(file_path)
print(df.head())
print(df.info())
print(df.isnull().sum())

                  date   HUFL   HULL   MUFL   MULL   LUFL   LULL         OT
0  2016-07-01 00:00:00  5.827  2.009  1.599  0.462  4.203  1.340  30.531000
1  2016-07-01 01:00:00  5.693  2.076  1.492  0.426  4.142  1.371  27.787001
2  2016-07-01 02:00:00  5.157  1.741  1.279  0.355  3.777  1.218  27.787001
3  2016-07-01 03:00:00  5.090  1.942  1.279  0.391  3.807  1.279  25.044001
4  2016-07-01 04:00:00  5.358  1.942  1.492  0.462  3.868  1.279  21.948000
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17420 entries, 0 to 17419
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    17420 non-null  object 
 1   HUFL    17420 non-null  float64
 2   HULL    17420 non-null  float64
 3   MUFL    17420 non-null  float64
 4   MULL    17420 non-null  float64
 5   LUFL    17420 non-null  float64
 6   LULL    17420 non-null  float64
 7   OT      17420 non-null  float64
dtypes: float64(7), object(1)
memory usage: 1.1+ MB
None
date    0

In [3]:
# Convert 'date' to datetime, set as index, and sort
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)
df.sort_index(inplace=True)

# Keep the latest 5000 rows
df_latest = df.tail(5000)

# Train-test split: 70% for training (for TimeGAN), 30% for testing (TSTR)
train_size = int(0.7 * len(df_latest))
df_train = df_latest.iloc[:train_size]
df_test = df_latest.iloc[train_size:]  # use later for LSTM-TSTR

# Normalize the data
scaler = MinMaxScaler()
scaled_train = scaler.fit_transform(df_train)
df_scaled_train = pd.DataFrame(scaled_train, columns=df_train.columns, index=df_train.index)
scaled_test = scaler.transform(df_test)
df_scaled_test = pd.DataFrame(scaled_test, columns=df_test.columns, index=df_test.index)

# Sequence length for time-series data (dataset = hourly; 24 hours)
sequence_length = 24
# temporal_data = []
# observation_times = []

# # Generate sequences from df_scaled_train only
# for start in range(len(df_scaled_train) - sequence_length + 1):
#     sequence = df_scaled_train.iloc[start:start + sequence_length].reset_index(drop=True)
#     temporal_data.append(sequence)
#     observation_times.append(list(range(sequence_length)))  # relative time within the window

# # Dummy outcome for TimeGAN (can be used in DataLoader)
# dummy_outcome = pd.DataFrame(np.zeros(len(temporal_data)), columns=["outcome"])

# # Create DataLoader for TimeGAN
# loader = TimeSeriesDataLoader(
#     temporal_data=temporal_data,
#     observation_times=observation_times,
#     static_data=None,
#     outcome=dummy_outcome,
# )

# # Print the loader info
# print(f"TimeSeriesDataLoader created with {len(temporal_data)} sequences")


In [4]:
temporal_data_test = []
observation_times_test = []

# Generate sequences from df_scaled_test only
for start in range(len(df_scaled_test) - sequence_length + 1):
    sequence = df_scaled_test.iloc[start:start + sequence_length].reset_index(drop=True)
    temporal_data_test.append(sequence)
    observation_times_test.append(list(range(sequence_length)))  # relative time within the window

# Dummy outcome for TimeGAN (can be used in DataLoader)
dummy_outcome = pd.DataFrame(np.zeros(len(temporal_data_test)), columns=["outcome"])

# Create DataLoader for TimeGAN
loader_test = TimeSeriesDataLoader(
    temporal_data=temporal_data_test,
    observation_times=observation_times_test,
    static_data=None,
    outcome=dummy_outcome,
)

# Print the loader info
print(f"TimeSeriesDataLoader TEST SET created with {len(temporal_data_test)} sequences")

TimeSeriesDataLoader TEST SET created with 1477 sequences


In [None]:
print(len(df_train))
print(loader.dataframe())

In [None]:
hparams = {
          "mode": "LSTM", # default mode = RNN;
          "dataloader_sampling_strategy": "none" # default = imbalanced_time_censoring
}

# Load TimeGAN with custom parameters
syn_model = Plugins().get("timegan", **hparams)

[2025-05-07T10:55:46.427394+0000][1158][CRITICAL] module disabled: /usr/local/lib/python3.11/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2025-05-07T10:55:46.427394+0000][1158][CRITICAL] module disabled: /usr/local/lib/python3.11/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2025-05-07T10:55:46.427394+0000][1158][CRITICAL] module disabled: /usr/local/lib/python3.11/dist-packages/synthcity/plugins/generic/plugin_goggle.py


In [None]:
# Print all parameters of initialized model
for attr in dir(syn_model):
    if not attr.startswith("_") and not callable(getattr(syn_model, attr)):
        print(f"{attr}: {getattr(syn_model, attr)}")

batch_size: 64
class_name: TimeGANPlugin
clipping_value: 0
compress_dataset: False
dataloader_sampling_strategy: none
device: cuda
discriminator_batch_norm: False
discriminator_dropout: 0.1
discriminator_loss: None
discriminator_lr: 0.001
discriminator_n_iter: 1
discriminator_n_layers_hidden: 3
discriminator_n_units_hidden: 300
discriminator_nonlin: leaky_relu
discriminator_weight_decay: 0.001
embedding_penalty: 10
encoder: None
encoder_max_clusters: 20
expecting_conditional: False
fitted: False
gamma_penalty: 1
generator_batch_norm: False
generator_dropout: 0.01
generator_loss: None
generator_lr: 0.001
generator_n_layers_hidden: 2
generator_n_units_hidden: 150
generator_nonlin: leaky_relu
generator_nonlin_out_continuous: tanh
generator_nonlin_out_discrete: softmax
generator_residual: True
generator_weight_decay: 0.001
mode: LSTM
module_name: synthcity.plugins.time_series.plugin_timegan
module_relative_path: ../time_series/plugin_timegan.py
moments_penalty: 100
n_iter: 1000
n_iter_prin

## fitting the model

In [None]:
print(loader.shape)

(83448, 10)


In [None]:
#  Train the model
syn_model.fit(loader)

100%|██████████| 1000/1000 [2:09:12<00:00,  7.75s/it]


<synthcity.plugins.time_series.plugin_timegan.TimeGANPlugin at 0x7ed4d27ecd50>

In [None]:
saved_model = syn_model.save()

In [None]:
from synthcity.utils.serialization import save_to_file, load_from_file

# Save model to drive
# save_to_file('/content/drive/Shareddrives/sp_env/test_model.pkl', syn_model)
save_to_file('/content/drive/Shareddrives/sp_env/saved_models/GAN_Electricity.pkl', syn_model)

# Load the model
# loaded_model = load_from_file('/content/drive/Shareddrives/sp_env/test_model.pkl')

In [None]:
n_samples = len(temporal_data)
print("n_samples:", n_samples)
syn_data = syn_model.generate(count=n_samples)
print(syn_data.shape)

n_samples: 3477
(43421, 10)


In [None]:
# Save with automated format
import datetime
import os
# Get the current date and time
now = datetime.datetime.now()
timestamp = now.strftime("%m%d%y-%H%M%S")  # MMDDYY-HHMMSS format

# Define the base directory
base_dir = "/content/drive/Shareddrives/sp_env/synthetic_datasets/TimeGAN/electricity"  #CHANGE THIS
if not os.path.exists(base_dir):
    os.makedirs(base_dir)

# Construct the filename
model_name = type(syn_model).__name__.lower() # Get model name dynamically
filename = f"{timestamp}-{model_name}-n_3000.csv"
filepath = os.path.join(base_dir, filename)

# Save the data
df_syn = syn_data.dataframe()
df_syn.to_csv(filepath, index=False)

print(f"Synthetic data saved to: {filepath}")

Synthetic data saved to: /content/drive/Shareddrives/sp_env/synthetic_datasets/TimeGAN/electricity/050725-132812-timeganplugin-n_3000.csv


# Evaluation

## Prerequisites

In [6]:
syn_data = pd.read_csv('/content/drive/Shareddrives/sp_env/synthetic_datasets/TimeGAN/electricity/050725-132812-timeganplugin-n_3000.csv')

In [7]:
# Define selected columns explicitly
selected_columns = ['seq_temporal_HUFL', 'seq_temporal_HULL', 'seq_temporal_LUFL', 'seq_temporal_LULL', 'seq_temporal_MUFL', 'seq_temporal_MULL', 'seq_temporal_OT']

# Ensure real_data and synthetic_data only contain the selected columns
real_data = loader_test.dataframe()[selected_columns].to_numpy()
synthetic_data = syn_data[selected_columns].to_numpy()

In [8]:
#  Check datasets

print(real_data, "\n ------------------------------------------------------- \n", synthetic_data)
print(type(real_data),type(synthetic_data))
print(real_data.shape,synthetic_data.shape)

""" TODO
[] add adjusting off dataset to fit min length here
[] remove min length stuff in helper funcs
"""

[[0.64238779 0.32967478 0.42980559 ... 0.67287374 0.33964623 0.67070109]
 [0.31229163 0.50270358 0.36846652 ... 0.32249713 0.55234947 0.69878968]
 [0.54529643 0.36752483 0.42102231 ... 0.56598494 0.38091834 0.75502396]
 ...
 [0.56471953 0.49729644 0.53506119 ... 0.56506958 0.4952653  0.79921214]
 [0.72005606 0.49729644 0.52627788 ... 0.72954808 0.4952653  0.77106644]
 [0.74109776 0.47566783 0.53506119 ... 0.74814312 0.44765052 0.75902034]] 
 ------------------------------------------------------- 
 [[0.8511261  0.5072802  0.33854351 ... 0.86287211 0.37202017 0.50054281]
 [0.85119168 0.5072618  0.65129147 ... 0.86286684 0.37195367 0.75588455]
 [0.80797667 0.28399383 0.38214453 ... 0.86282263 0.33302839 0.50060487]
 ...
 [0.67343941 0.56629246 0.43043925 ... 0.83052238 0.37179222 0.46109215]
 [0.78868023 0.44475754 0.38193633 ... 0.79199183 0.54619039 0.50064705]
 [0.85097307 0.2835606  0.65116624 ... 0.75196361 0.37169687 0.37934677]]
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(354

' TODO\n[] add adjusting off dataset to fit min length here\n[] remove min length stuff in helper funcs\n'

## Generate distance metrics

### Helper Functions

In [10]:
from scipy.stats import wasserstein_distance, entropy
import numpy as np

def compute_wasserstein(real_data, synthetic_data, selected_columns):
    """
    Computes Wasserstein Distance between real and synthetic time-series data.

    """

    # Ensure both datasets have the same number of samples
    min_length = min(len(real_data), len(synthetic_data))
    real_trimmed = real_data[:min_length]  # Keep original order (no random sampling)
    synthetic_trimmed = synthetic_data[:min_length]  # Match size
    print(real_trimmed.shape,synthetic_trimmed.shape)

    wasserstein_results = {}

    # Compute Wasserstein Distance for each feature
    for i, col in enumerate(selected_columns):
        w_dist = wasserstein_distance(real_trimmed[:, i], synthetic_trimmed[:, i])
        wasserstein_results[col] = w_dist
        print(f"{w_dist}")

    return wasserstein_results

def compute_kl_divergence(real_data, synthetic_data, selected_columns, bins=50):
    """
    Computes KL Divergence between real and synthetic time-series data.

    """

    # Ensure both datasets have the same number of samples
    min_length = min(len(real_data), len(synthetic_data))
    real_trimmed = real_data[:min_length]  # Keep original order
    synthetic_trimmed = synthetic_data[:min_length]  # Match size

    kl_results = {}

    for i, col in enumerate(selected_columns):
        # Compute histogram-based probability distributions
        real_hist, _ = np.histogram(real_trimmed[:, i], bins=bins, density=True)
        synth_hist, _ = np.histogram(synthetic_trimmed[:, i], bins=bins, density=True)

        # Avoid zero probabilities (KL Divergence is undefined for zero values)
        real_hist += 1e-10
        synth_hist += 1e-10

        # Compute KL Divergence
        kl_div = entropy(real_hist, synth_hist)
        kl_results[col] = kl_div
        print(f"{kl_div}")

    return kl_results

### Generate Metrics

In [12]:
# Compute Wasserstein Distance
wasserstein_results = compute_wasserstein(real_data, synthetic_data, selected_columns)
print("Wasserstein Distance Results:")
print(wasserstein_results)

# Compute KL Divergence
kl_results = compute_kl_divergence(real_data, synthetic_data, selected_columns)
print("KL Divergence Results:")
print(kl_results)

(35448, 7) (35448, 7)
0.12589359866398342
0.08450584982597788
0.09317955417348268
0.2239760852634426
0.16200768196480073
0.09962678698033475
0.33575507662121135
Wasserstein Distance Results:
{'seq_temporal_HUFL': 0.12589359866398342, 'seq_temporal_HULL': 0.08450584982597788, 'seq_temporal_LUFL': 0.09317955417348268, 'seq_temporal_LULL': 0.2239760852634426, 'seq_temporal_MUFL': 0.16200768196480073, 'seq_temporal_MULL': 0.09962678698033475, 'seq_temporal_OT': 0.33575507662121135}
11.436516277561278
14.463556430780402
13.979544374637179
11.79633753592787
12.691260723979555
15.10601419655953
13.45767585080329
KL Divergence Results:
{'seq_temporal_HUFL': 11.436516277561278, 'seq_temporal_HULL': 14.463556430780402, 'seq_temporal_LUFL': 13.979544374637179, 'seq_temporal_LULL': 11.79633753592787, 'seq_temporal_MUFL': 12.691260723979555, 'seq_temporal_MULL': 15.10601419655953, 'seq_temporal_OT': 13.45767585080329}


# LSTM downstream

In [15]:
filepath = '/content/drive/Shareddrives/sp_env/synthetic_datasets/TimeGAN/electricity/050725-132812-timeganplugin-n_3000.csv'

In [16]:
real_data = loader_test.dataframe()
df_synth = pd.read_csv(filepath)

# 2. Drop the unwanted column
real_data = real_data.drop(columns=["seq_id", "seq_time_id", "seq_out_outcome"], errors="ignore")
df_synth = df_synth.drop(columns=["seq_id", "seq_time_id", "seq_out_outcome"], errors="ignore")

In [17]:
print(f"real_data: {real_data.shape}, synthetic_data: {df_synth.shape}")

real_data: (35448, 7), synthetic_data: (43421, 7)


In [21]:
#@title ✧.* libraries ✧.*

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

recheck LSTM notebook for

In [23]:
# Convert to tensors (float32 for PyTorch)
data_real = torch.tensor(real_data.values, dtype=torch.float32)
data_synth = torch.tensor(df_synth.values, dtype=torch.float32)

# ──────── Sequence builder ───────────
def make_sequences(data, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len])
    return torch.stack(X), torch.stack(y)

SEQ_LEN = sequence_length

# Sequences for synthetic (train)
X_train, y_train = make_sequences(data_synth, SEQ_LEN)
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True)

# Sequences for real (test)
X_test, y_test = make_sequences(data_real, SEQ_LEN)


In [25]:
#@title ✧.* model definition and training ✧.*

# ─── Model Definition ──────────────────────────────────────
class ShallowLSTM(nn.Module):
    def __init__(self, input_size, hidden_size=64):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, input_size)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)  # hn shape: (1, batch, hidden_size)
        out = self.linear(hn.squeeze(0))  # squeeze to (batch, hidden_size)
        return out


# ─── Model Init ─────────────────────────────────────────────
model = ShallowLSTM(input_size=X_train.shape[2], hidden_size=64)

# ─── Optimizer & Loss ───────────────────────────────────────
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)


In [None]:
# ─── Training ───────────────────────────────────────────────
EPOCHS = 50
for epoch in range(1, EPOCHS + 1):
    model.train()
    for xb, yb in train_loader:
        pred = model(xb)
        loss = loss_fn(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # if epoch % 10 == 0 or epoch == 1:
    print(f"Epoch {epoch}: Train MSE = {loss.item():.6f}")

In [25]:
#@title ✧.* model evaluation ✧.*
model.eval()
with torch.no_grad():
    preds = model(X_test)
    test_mse = loss_fn(preds, y_test).item()
    test_mae = mean_absolute_error(y_test.numpy(), preds.numpy())

    print(f"Test MSE: {test_mse:.6f}")
    print(f"Test MAE: {test_mae:.6f}")

Test MSE: 0.060231
Test MAE: 0.188900


# Bootstrapping Sample

In [None]:
!pip install tsbootstrap

In [27]:
from tsbootstrap import MovingBlockBootstrap
import numpy as np

bootstrap_configs = {
    "weather": {"block_length": 36, "n_bootstraps": 15, "rng": 42},       # 6-hour pattern (10-min interval)
    "electricity": {"block_length": 24, "n_bootstraps": 15, "rng": 42},   # 1-day pattern (hourly)
    "exchange": {"block_length": 30, "n_bootstraps": 15, "rng": 42},      # 1-month pattern (daily)
}

# Example for weather
dataset_name = "electricity"
config = bootstrap_configs[dataset_name]

real_test_array = real_data[selected_columns].to_numpy()  # shape (N, features)
mbb = MovingBlockBootstrap(
    n_bootstraps=config["n_bootstraps"],
    rng=config["rng"],
    block_length=config["block_length"]
)
boot_samples = mbb.bootstrap(real_test_array, return_indices=False)


In [28]:
bootstrap_results = []

for b_idx, boot_real in enumerate(boot_samples):
    # 1. Match the synthetic data size
    syn_trimmed = synthetic_data[:len(boot_real)]

    # 2. Fidelity metrics
    wasserstein = compute_wasserstein(boot_real, syn_trimmed, selected_columns)
    kl = compute_kl_divergence(boot_real, syn_trimmed, selected_columns)

    # 3. Utility metrics
    # Preprocess this bootstrap sample for LSTM (as you do with real_data)
    boot_tensor = torch.tensor(boot_real, dtype=torch.float32)
    Xb_test, yb_test = make_sequences(boot_tensor, SEQ_LEN)

    model.eval()
    with torch.no_grad():
        preds = model(Xb_test)
        mse = mean_squared_error(yb_test.numpy(), preds.numpy())
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(yb_test.numpy(), preds.numpy())

    # 4. Store results
    bootstrap_results.append({
        'bootstrap': b_idx,
        'wasserstein': np.mean(list(wasserstein.values())),
        'kl': np.mean(list(kl.values())),
        'rmse': rmse,
        'mae': mae
    })

(35448, 7) (35448, 7)
0.12629808393726177
0.08302169078762894
0.09361613698202868
0.2238750465741284
0.162539357723731
0.09927551451175128
0.3331598464918357
11.375785192605088
14.52953528581811
13.940625518091212
11.746758616034878
12.691615716973075
14.956244906758794
13.442675463586168
(35448, 7) (35448, 7)
0.12781715864971524
0.08402150046521507
0.0931879448335285
0.22318203908110712
0.16405819411914557
0.09914975365824384
0.33240511035641457
11.489385364484672
14.51462260449909
14.017100427140146
11.809124770840935
12.672360318677915
15.097296894552109
13.503077747312618
(35448, 7) (35448, 7)
0.12527220997383734
0.08301268480612087
0.09240877853449549
0.22541876151159568
0.16147681485683688
0.09886341087792563
0.34093146227217613
11.429630782647152
14.375784761557867
13.998061086883268
11.749080918176222
12.7570447312333
15.026733243101349
13.320206844635315
(35448, 7) (35448, 7)
0.12905576479587752
0.08191425280048419
0.09315168129819659
0.2223238856427728
0.1656475798315097
0.09

In [29]:
print(bootstrap_results)

# Assuming bootstrap_results is your list of dicts
df_results = pd.DataFrame(bootstrap_results)

df_results['Dataset'] = 'El'
df_results['Model'] = 'GAN'

df_results

[{'bootstrap': 0, 'wasserstein': 0.16025509671548083, 'kl': 13.240462957123905, 'rmse': 0.6621019, 'mae': 0.6246629}, {'bootstrap': 1, 'wasserstein': 0.16054595730905286, 'kl': 13.300424018215354, 'rmse': 0.6618976, 'mae': 0.62488705}, {'bootstrap': 2, 'wasserstein': 0.16105487469042684, 'kl': 13.236648909747784, 'rmse': 0.663604, 'mae': 0.62608546}, {'bootstrap': 3, 'wasserstein': 0.16140887326218295, 'kl': 13.256149548435065, 'rmse': 0.66230774, 'mae': 0.6244455}, {'bootstrap': 4, 'wasserstein': 0.16008470681210094, 'kl': 13.23941284066011, 'rmse': 0.66376495, 'mae': 0.6271722}, {'bootstrap': 5, 'wasserstein': 0.15917137580358884, 'kl': 13.281383908511364, 'rmse': 0.6635457, 'mae': 0.6273698}, {'bootstrap': 6, 'wasserstein': 0.1613752993918209, 'kl': 13.275141436591166, 'rmse': 0.66338, 'mae': 0.6261959}, {'bootstrap': 7, 'wasserstein': 0.1608017769961006, 'kl': 13.314618492619905, 'rmse': 0.6629921, 'mae': 0.62582284}, {'bootstrap': 8, 'wasserstein': 0.15804143606680718, 'kl': 13.20

Unnamed: 0,bootstrap,wasserstein,kl,rmse,mae,Dataset,Model
0,0,0.160255,13.240463,0.662102,0.624663,El,GAN
1,1,0.160546,13.300424,0.661898,0.624887,El,GAN
2,2,0.161055,13.236649,0.663604,0.626085,El,GAN
3,3,0.161409,13.25615,0.662308,0.624445,El,GAN
4,4,0.160085,13.239413,0.663765,0.627172,El,GAN
5,5,0.159171,13.281384,0.663546,0.62737,El,GAN
6,6,0.161375,13.275141,0.66338,0.626196,El,GAN
7,7,0.160802,13.314618,0.662992,0.625823,El,GAN
8,8,0.158041,13.20339,0.660104,0.622823,El,GAN
9,9,0.161083,13.275978,0.663756,0.627383,El,GAN


In [31]:
summary_row = {
    'Dataset': 'El',
    'Model': 'GAN',
    'Wasserstein': df_results['wasserstein'].mean(),
    'KL': df_results['kl'].mean(),
    'RMSE': df_results['rmse'].mean(),
    'MAE': df_results['mae'].mean()
}

df_summary = pd.DataFrame([summary_row])
print(df_summary)

  Dataset Model  Wasserstein        KL      RMSE       MAE
0      El   GAN     0.160721  13.26968  0.662984  0.625872
