# Metric Presentation and Visualization
## Necessary packages and functions call

- DDPM-TS: Interpretable Diffusion for Time Series Generation
- Metrics: 
    - discriminative_metrics
    - predictive_metrics
    - visualization

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import sys
sys.path.append(os.path.join(os.path.dirname('__file__'), '../'))

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

from Utils.metric_utils import display_scores
from Utils.discriminative_metric import discriminative_score_metrics
from Utils.predictive_metric import predictive_score_metrics

## Data Loading ETTh

Load original dataset and preprocess the loaded data.

In [8]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/etth/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/etth/ddpm_fake_etth_milestone_20.npy')


print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (17397, 24, 7)
fake shape is:  (121856, 24, 1)
ori shape is:  (121779, 24, 1)
fake shape is:  (121856, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [4]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Please use tf.global_variables instead.


training: 100%|██████████| 2000/2000 [01:09<00:00, 28.73it/s]


Iter 0:  0.005501724421087206 , 0.46530629003120383 , 0.5456971588109706 



training: 100%|██████████| 2000/2000 [01:01<00:00, 32.43it/s]


Iter 1:  0.0038799474462145045 , 0.47552964361964195 , 0.5167104614879291 



training: 100%|██████████| 2000/2000 [01:01<00:00, 32.39it/s]


Iter 2:  0.0036951880440138085 , 0.4850139595992774 , 0.507595664312695 



training: 100%|██████████| 2000/2000 [01:04<00:00, 30.91it/s]


Iter 3:  0.005563310888487438 , 0.4791837740187223 , 0.5096896042043029 



training: 100%|██████████| 2000/2000 [00:58<00:00, 34.11it/s]


Iter 4:  0.006097060272622745 , 0.49679750369518805 , 0.4910083757595664 

etth:
Final Score:  0.004947446214485141 ± 0.0013481964386171375



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [10]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training: 100%|██████████| 5000/5000 [01:51<00:00, 45.01it/s]


0  epoch:  0.1510372686519242 



training: 100%|██████████| 5000/5000 [01:49<00:00, 45.82it/s]


1  epoch:  0.15103991161064007 



training: 100%|██████████| 5000/5000 [01:54<00:00, 43.51it/s]


2  epoch:  0.151025879426795 



training: 100%|██████████| 5000/5000 [01:49<00:00, 45.53it/s]


3  epoch:  0.1510421308262698 



training: 100%|██████████| 5000/5000 [01:52<00:00, 44.51it/s]


4  epoch:  0.1510333939462034 

sine:
Final Score:  0.1510357168923665 ± 7.933882568628435e-06



## Data Loading energy

Load original dataset and preprocess the loaded data.

In [11]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/energy/samples/energy_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/energy/ddpm_fake_energy_milestone_20.npy')

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (19712, 24, 28)
fake shape is:  (552160, 24, 1)
ori shape is:  (551936, 24, 1)
fake shape is:  (552160, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [12]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


training: 100%|██████████| 2000/2000 [01:41<00:00, 19.63it/s]


Iter 0:  0.0007201869768452829 , 0.5399952893430445 , 0.4614450846106461 



training: 100%|██████████| 2000/2000 [01:37<00:00, 20.52it/s]


Iter 1:  0.013615610392433974 , 0.5141682066891329 , 0.5130630140957351 



training: 100%|██████████| 2000/2000 [01:41<00:00, 19.80it/s]


Iter 2:  0.018009203898974535 , 0.49825162155306735 , 0.5377667862448817 



training: 100%|██████████| 2000/2000 [01:39<00:00, 20.20it/s]


Iter 3:  0.01605246947131933 , 0.5108798057759901 , 0.5212251331666485 



training: 100%|██████████| 2000/2000 [01:36<00:00, 20.64it/s]


Iter 4:  0.007890350400405866 , 0.6519549226365184 , 0.3322643765626699 

etth:
Final Score:  0.011257564227995797 ± 0.008702961596478194



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [13]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training: 100%|██████████| 5000/5000 [03:09<00:00, 26.39it/s]


0  epoch:  0.18773580251039476 



training: 100%|██████████| 5000/5000 [03:04<00:00, 27.17it/s]


1  epoch:  0.1878201061747189 



training: 100%|██████████| 5000/5000 [03:04<00:00, 27.04it/s]


2  epoch:  0.18766786034212374 



training: 100%|██████████| 5000/5000 [03:10<00:00, 26.26it/s]


3  epoch:  0.18772017511864228 



training: 100%|██████████| 5000/5000 [03:06<00:00, 26.88it/s]


4  epoch:  0.18778514020183706 

sine:
Final Score:  0.18774581686954334 ± 7.319562475669171e-05

