# Metric Presentation and Visualization
## Necessary packages and functions call

- DDPM-TS: Interpretable Diffusion for Time Series Generation
- Metrics: 
    - discriminative_metrics
    - predictive_metrics
    - visualization

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import sys
sys.path.append(os.path.join(os.path.dirname('__file__'), '../'))

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

from Utils.metric_utils import display_scores
from Utils.discriminative_metric import discriminative_score_metrics
from Utils.predictive_metric import predictive_score_metrics

## Data Loading Morning Peak

Load original dataset and preprocess the loaded data.

In [None]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/etth_mpep/samples/morning_peak_etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/etth_mpep/ddpm_fake_morning_peak_etth_milestone_500.npy')

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (2881, 24, 7)
fake shape is:  (20384, 24, 1)
ori shape is:  (20167, 24, 1)
fake shape is:  (20384, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [3]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Please use tf.global_variables instead.


training: 100%|██████████| 2000/2000 [00:48<00:00, 41.13it/s]


Iter 0:  0.020823004462072414 , 0.19583539910758552 , 0.8458106098165592 



training: 100%|██████████| 2000/2000 [00:48<00:00, 41.08it/s]


Iter 1:  0.005453644025780857 , 0.5342092216162617 , 0.47669806643529994 



training: 100%|██████████| 2000/2000 [00:48<00:00, 41.03it/s]


Iter 2:  0.001859196826970777 , 0.4970252850768468 , 0.5066931085770947 



training: 100%|██████████| 2000/2000 [00:47<00:00, 42.16it/s]


Iter 3:  0.0047099652949925686 , 0.5188398611799703 , 0.49058006941001486 



training: 100%|██████████| 2000/2000 [00:47<00:00, 42.03it/s]


Iter 4:  0.028755577590480863 , 0.22211204759543876 , 0.835399107585523 

etth:
Final Score:  0.012320277640059496 ± 0.014651071826468746



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [5]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training: 100%|██████████| 5000/5000 [01:30<00:00, 55.17it/s]


0  epoch:  0.16061335135943425 



training: 100%|██████████| 5000/5000 [01:28<00:00, 56.52it/s]


1  epoch:  0.16067238735818345 



training: 100%|██████████| 5000/5000 [01:27<00:00, 57.06it/s]


2  epoch:  0.16063763687608792 



training: 100%|██████████| 5000/5000 [01:29<00:00, 55.95it/s]


3  epoch:  0.16066406364535835 



training: 100%|██████████| 5000/5000 [01:28<00:00, 56.74it/s]


4  epoch:  0.1606531366895541 

sine:
Final Score:  0.1606481151857236 ± 2.9033610601721645e-05



## Data Loading Evening Peak

Load original dataset and preprocess the loaded data.

In [None]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/etth_mpep/samples/evening_peak_etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/etth_mpep/ddpm_fake_evening_peak_etth_milestone_500.npy')

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (2880, 24, 7)
fake shape is:  (20384, 24, 1)
ori shape is:  (20160, 24, 1)
fake shape is:  (20384, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [7]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


training: 100%|██████████| 2000/2000 [00:43<00:00, 46.24it/s]


Iter 0:  0.011532738095238138 , 0.5674603174603174 , 0.4556051587301587 



training: 100%|██████████| 2000/2000 [00:45<00:00, 43.71it/s]


Iter 1:  0.009176587301587324 , 0.6815476190476191 , 0.3368055555555556 



training: 100%|██████████| 2000/2000 [00:49<00:00, 40.42it/s]


Iter 2:  0.01252480158730157 , 0.4623015873015873 , 0.5126488095238095 



training: 100%|██████████| 2000/2000 [00:44<00:00, 44.48it/s]


Iter 3:  0.010044642857142905 , 0.5860615079365079 , 0.4340277777777778 



training: 100%|██████████| 2000/2000 [00:44<00:00, 44.74it/s]


Iter 4:  0.012896825396825351 , 0.6748511904761905 , 0.3509424603174603 

etth:
Final Score:  0.011235119047619058 ± 0.0019796572133499515



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [8]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training: 100%|██████████| 5000/5000 [01:22<00:00, 60.49it/s]


0  epoch:  0.1308773325415381 



training: 100%|██████████| 5000/5000 [01:24<00:00, 59.02it/s]


1  epoch:  0.1308719117721288 



training: 100%|██████████| 5000/5000 [01:27<00:00, 57.29it/s]


2  epoch:  0.13086946294445237 



training: 100%|██████████| 5000/5000 [01:29<00:00, 55.87it/s]


3  epoch:  0.13089251388254178 



training: 100%|██████████| 5000/5000 [01:29<00:00, 55.90it/s]


4  epoch:  0.130872600166828 

sine:
Final Score:  0.1308767642614978 ± 1.1490744545096812e-05

