# Metric Presentation and Visualization
## Necessary packages and functions call

- DDPM-TS: Interpretable Diffusion for Time Series Generation
- Metrics: 
    - discriminative_metrics
    - predictive_metrics
    - visualization

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import sys
sys.path.append(os.path.join(os.path.dirname('__file__'), '../'))

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

from Utils.metric_utils import display_scores
from Utils.discriminative_metric import discriminative_score_metrics
from Utils.predictive_metric import predictive_score_metrics

## Data Loading ETTh Morning Peak

Load original dataset and preprocess the loaded data.

In [2]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/etthmpep_energympep/samples/morning_peak_etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/etthmpep_energympep/ddpm_fake_morning_peak_etth_milestone_500.npy')

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (2881, 24, 7)
fake shape is:  (20700, 24, 1)
ori shape is:  (20167, 24, 1)
fake shape is:  (20700, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [3]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Please use tf.global_variables instead.


training: 100%|██████████| 2000/2000 [00:50<00:00, 39.64it/s]


Iter 0:  0.0625929598413485 , 0.4075359444719881 , 0.717649975210709 



training: 100%|██████████| 2000/2000 [00:52<00:00, 38.44it/s]


Iter 1:  0.16596430342092217 , 0.7240951908775409 , 0.6078334159643034 



training: 100%|██████████| 2000/2000 [00:50<00:00, 39.23it/s]


Iter 2:  0.056643529995042186 , 0.39142290530490825 , 0.721864154685176 



training: 100%|██████████| 2000/2000 [00:52<00:00, 38.27it/s]


Iter 3:  0.06643529995042141 , 0.40233019335647 , 0.7305404065443728 



training: 100%|██████████| 2000/2000 [00:41<00:00, 48.64it/s]


Iter 4:  0.1601388200297471 , 0.7729300941993059 , 0.5473475458601884 

etth:
Final Score:  0.10235498264749628 ± 0.06898194088645057



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [4]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training: 100%|██████████| 5000/5000 [01:51<00:00, 44.97it/s]


0  epoch:  0.16629748987855095 



training: 100%|██████████| 5000/5000 [01:48<00:00, 45.90it/s]


1  epoch:  0.16708296074756201 



training: 100%|██████████| 5000/5000 [01:42<00:00, 48.88it/s]


2  epoch:  0.16649823629208071 



training: 100%|██████████| 5000/5000 [01:36<00:00, 51.87it/s]


3  epoch:  0.16528543678960672 



training: 100%|██████████| 5000/5000 [01:36<00:00, 51.57it/s]


4  epoch:  0.165198863404329 

sine:
Final Score:  0.1660725974224259 ± 0.0010078939550237146



## Data Loading ETTh Evening Peak

Load original dataset and preprocess the loaded data.

In [5]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/etthmpep_energympep/samples/evening_peak_etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/etthmpep_energympep/ddpm_fake_evening_peak_etth_milestone_500.npy')

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (2880, 24, 7)
fake shape is:  (20700, 24, 1)
ori shape is:  (20160, 24, 1)
fake shape is:  (20700, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [6]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


training: 100%|██████████| 2000/2000 [00:44<00:00, 44.91it/s]


Iter 0:  0.06386408730158732 , 0.4211309523809524 , 0.7065972222222222 



training: 100%|██████████| 2000/2000 [00:42<00:00, 46.86it/s]


Iter 1:  0.07539682539682535 , 0.4007936507936508 , 0.75 



training: 100%|██████████| 2000/2000 [00:41<00:00, 48.07it/s]


Iter 2:  0.07614087301587302 , 0.37028769841269843 , 0.7819940476190477 



training: 100%|██████████| 2000/2000 [00:52<00:00, 37.84it/s]


Iter 3:  0.0647321428571429 , 0.4503968253968254 , 0.6790674603174603 



training: 100%|██████████| 2000/2000 [00:48<00:00, 40.91it/s]


Iter 4:  0.0670882936507936 , 0.359375 , 0.7748015873015873 

etth:
Final Score:  0.06944444444444445 ± 0.007323992044010694



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [7]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training: 100%|██████████| 5000/5000 [01:46<00:00, 46.74it/s]


0  epoch:  0.13190442849686534 



training: 100%|██████████| 5000/5000 [01:37<00:00, 51.34it/s]


1  epoch:  0.13179509868067918 



training: 100%|██████████| 5000/5000 [01:36<00:00, 51.94it/s]


2  epoch:  0.13212791458997872 



training: 100%|██████████| 5000/5000 [01:43<00:00, 48.26it/s]


3  epoch:  0.13238064137008862 



training: 100%|██████████| 5000/5000 [01:40<00:00, 49.59it/s]


4  epoch:  0.13225907210008284 

sine:
Final Score:  0.13209343104753896 ± 0.0003014968877921067



## Data Loading Energy Morning Peak

Load original dataset and preprocess the loaded data.

In [8]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/etthmpep_energympep/samples/morning_peak_energy_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/etthmpep_energympep/ddpm_fake_morning_peak_energy_milestone_500.npy')

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (2580, 24, 28)
fake shape is:  (72900, 24, 1)
ori shape is:  (72240, 24, 1)
fake shape is:  (72900, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [9]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


training: 100%|██████████| 2000/2000 [00:50<00:00, 39.69it/s]


Iter 0:  0.07790005537098565 , 0.720999446290144 , 0.43480066445182725 



training: 100%|██████████| 2000/2000 [00:52<00:00, 38.26it/s]


Iter 1:  0.091985049833887 , 0.6966362126245847 , 0.48733388704318936 



training: 100%|██████████| 2000/2000 [00:48<00:00, 41.57it/s]


Iter 2:  0.09831810631229232 , 0.7629429678848284 , 0.43369324473975635 



training: 100%|██████████| 2000/2000 [00:47<00:00, 42.02it/s]


Iter 3:  0.06388427464008861 , 0.6711655592469546 , 0.4566029900332226 



training: 100%|██████████| 2000/2000 [00:50<00:00, 39.39it/s]


Iter 4:  0.09361157253599117 , 0.7784468438538206 , 0.40877630121816166 

etth:
Final Score:  0.08513981173864896 ± 0.01752140785200507



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [10]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training: 100%|██████████| 5000/5000 [01:46<00:00, 46.97it/s]


0  epoch:  0.20665932391720054 



training: 100%|██████████| 5000/5000 [01:42<00:00, 48.66it/s]


1  epoch:  0.20647256358621935 



training: 100%|██████████| 5000/5000 [01:47<00:00, 46.31it/s]


2  epoch:  0.20688947807210983 



training: 100%|██████████| 5000/5000 [01:55<00:00, 43.31it/s]


3  epoch:  0.2066399826994767 



training: 100%|██████████| 5000/5000 [01:50<00:00, 45.35it/s]


4  epoch:  0.20669701621103934 

sine:
Final Score:  0.20667167289720917 ± 0.00018508985641069327



## Data Loading Energy Evening Peak

Load original dataset and preprocess the loaded data.

In [11]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/etthmpep_energympep/samples/evening_peak_energy_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/etthmpep_energympep/ddpm_fake_evening_peak_energy_milestone_500.npy')

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (2587, 24, 28)
fake shape is:  (72900, 24, 1)
ori shape is:  (72436, 24, 1)
fake shape is:  (72900, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [12]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


training: 100%|██████████| 2000/2000 [00:51<00:00, 38.84it/s]


Iter 0:  0.06149917172832686 , 0.7281198233020431 , 0.3948785201546107 



training: 100%|██████████| 2000/2000 [00:49<00:00, 40.10it/s]


Iter 1:  0.05863473219215898 , 0.6403920485919381 , 0.4768774157923799 



training: 100%|██████████| 2000/2000 [00:52<00:00, 38.08it/s]


Iter 2:  0.07509663169519598 , 0.6585450027609056 , 0.49164826062948647 



training: 100%|██████████| 2000/2000 [00:51<00:00, 38.99it/s]


Iter 3:  0.06553699613473218 , 0.6618580894533407 , 0.4692159028161237 



training: 100%|██████████| 2000/2000 [00:53<00:00, 37.73it/s]


Iter 4:  0.06039480949751519 , 0.7195610160132524 , 0.40122860298177804 

etth:
Final Score:  0.06423246824958584 ± 0.008172022332191942



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [13]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training: 100%|██████████| 5000/5000 [01:44<00:00, 47.75it/s]


0  epoch:  0.18839867031502622 



training: 100%|██████████| 5000/5000 [01:43<00:00, 48.29it/s]


1  epoch:  0.18868218759724978 



training: 100%|██████████| 5000/5000 [01:46<00:00, 46.86it/s]


2  epoch:  0.18845623450655666 



training: 100%|██████████| 5000/5000 [01:40<00:00, 49.90it/s]


3  epoch:  0.18881878530559568 



training: 100%|██████████| 5000/5000 [01:45<00:00, 47.31it/s]


4  epoch:  0.18876217114431387 

sine:
Final Score:  0.18862360977374845 ± 0.00023174282544359662

