# Metric Presentation and Visualization
## Necessary packages and functions call

- DDPM-TS: Interpretable Diffusion for Time Series Generation
- Metrics: 
    - discriminative_metrics
    - predictive_metrics
    - visualization

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import sys
sys.path.append(os.path.join(os.path.dirname('__file__'), '../'))

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

from Utils.metric_utils import display_scores
from Utils.discriminative_metric import discriminative_score_metrics
from Utils.predictive_metric import predictive_score_metrics

## Data Loading Morning Peak

Load original dataset and preprocess the loaded data.

In [2]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/energy_mp/samples/energy_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/energy_mp/ddpm_fake_energy_mp_milestone_10.npy')
print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (2580, 24, 28)
fake shape is:  (72900, 24, 1)
ori shape is:  (72240, 24, 1)
fake shape is:  (72900, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [3]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Please use tf.global_variables instead.


training: 100%|██████████| 2000/2000 [00:44<00:00, 44.53it/s]


Iter 0:  0.003149224806201556 , 0.36766334440753046 , 0.6260382059800664 



training: 100%|██████████| 2000/2000 [00:57<00:00, 34.97it/s]


Iter 1:  0.046061738648947914 , 0.780592469545958 , 0.311531007751938 



training: 100%|██████████| 2000/2000 [00:54<00:00, 36.86it/s]


Iter 2:  0.03990171650055374 , 0.5728820598006644 , 0.506921373200443 



training: 100%|██████████| 2000/2000 [00:50<00:00, 39.23it/s]


Iter 3:  0.026543466223698742 , 0.48096622369878184 , 0.5721207087486158 



training: 100%|██████████| 2000/2000 [00:50<00:00, 39.36it/s]


Iter 4:  0.010035991140642353 , 0.3855897009966777 , 0.6344822812846068 

etth:
Final Score:  0.02513842746400886 ± 0.02297348865897968



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [4]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training: 100%|██████████| 5000/5000 [01:48<00:00, 46.24it/s]


0  epoch:  0.20374195240924642 



training: 100%|██████████| 5000/5000 [01:49<00:00, 45.49it/s]


1  epoch:  0.20384439943762353 



training: 100%|██████████| 5000/5000 [01:44<00:00, 47.89it/s]


2  epoch:  0.2036666798299636 



training: 100%|██████████| 5000/5000 [01:44<00:00, 48.00it/s]


3  epoch:  0.20406861899718998 



training: 100%|██████████| 5000/5000 [01:41<00:00, 49.12it/s]


4  epoch:  0.20370057546696121 

sine:
Final Score:  0.20380444522819693 ± 0.00020122783072334054



## Data Loading Evening Peak

Load original dataset and preprocess the loaded data.

In [5]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/energy_ep/samples/energy_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/energy_ep/ddpm_fake_energy_ep_milestone_10.npy')

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (2587, 24, 28)
fake shape is:  (72900, 24, 1)
ori shape is:  (72436, 24, 1)
fake shape is:  (72900, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [6]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


training: 100%|██████████| 2000/2000 [00:48<00:00, 41.15it/s]


Iter 0:  0.007903092214246232 , 0.24068194367752624 , 0.7751242407509663 



training: 100%|██████████| 2000/2000 [00:48<00:00, 40.89it/s]


Iter 1:  0.021431529541689698 , 0.5210519050248481 , 0.5218111540585312 



training: 100%|██████████| 2000/2000 [00:46<00:00, 43.42it/s]


Iter 2:  0.05059359469906133 , 0.8112230811706239 , 0.28996410822749863 



training: 100%|██████████| 2000/2000 [00:47<00:00, 42.23it/s]


Iter 3:  0.03502898950855882 , 0.6336278299282164 , 0.43643014908890115 



training: 100%|██████████| 2000/2000 [00:49<00:00, 40.44it/s]


Iter 4:  0.0123895637769188 , 0.043898398674765325 , 0.9808807288790723 

etth:
Final Score:  0.025469353948094974 ± 0.02167204014599465



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [None]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training:   9%|▊         | 433/5000 [00:08<01:28, 51.88it/s]