# Metric Presentation and Visualization
## Necessary packages and functions call

- DDPM-TS: Interpretable Diffusion for Time Series Generation
- Metrics: 
    - discriminative_metrics
    - predictive_metrics
    - visualization

In [9]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import sys
sys.path.append(os.path.join(os.path.dirname('__file__'), '../'))

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

from Utils.metric_utils import display_scores
from Utils.discriminative_metric import discriminative_score_metrics
from Utils.predictive_metric import predictive_score_metrics

## Data Loading ETTh Morning Peak

Load original dataset and preprocess the loaded data.

In [None]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/etth_mp/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/etth_mp/ddpm_fake_etth_mp_milestone_10.npy')

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (2881, 24, 7)
fake shape is:  (20384, 24, 1)
ori shape is:  (20167, 24, 1)
fake shape is:  (20384, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [11]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


training: 100%|██████████| 2000/2000 [00:38<00:00, 51.50it/s]


Iter 0:  0.0006197322756569257 , 0.5585027268220129 , 0.44273673772930094 



training: 100%|██████████| 2000/2000 [00:39<00:00, 50.05it/s]


Iter 1:  0.0018591968269707215 , 0.5292513634110064 , 0.4670302429350521 



training: 100%|██████████| 2000/2000 [00:42<00:00, 47.25it/s]


Iter 2:  0.0006197322756569257 , 0.44992563212692116 , 0.5513138324243927 



training: 100%|██████████| 2000/2000 [00:45<00:00, 44.11it/s]


Iter 3:  0.0027268220128904286 , 0.4476945959345563 , 0.5577590480912246 



training: 100%|██████████| 2000/2000 [00:49<00:00, 40.62it/s]


Iter 4:  0.0048339117501239315 , 0.47372335151214673 , 0.5359444719881011 

etth:
Final Score:  0.0021318790282597866 ± 0.0021775605302623534



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [12]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training: 100%|██████████| 5000/5000 [01:47<00:00, 46.34it/s]


0  epoch:  0.16059876932274933 



training: 100%|██████████| 5000/5000 [01:38<00:00, 50.81it/s]


1  epoch:  0.16054852553923113 



training: 100%|██████████| 5000/5000 [01:42<00:00, 48.55it/s]


2  epoch:  0.16055651784944694 



training: 100%|██████████| 5000/5000 [01:36<00:00, 51.82it/s]


3  epoch:  0.16067534827721086 



training: 100%|██████████| 5000/5000 [01:33<00:00, 53.40it/s]


4  epoch:  0.1605586214076449 

sine:
Final Score:  0.16058755647925663 ± 6.55769557301541e-05



## Data Loading ETTh Evening Peak

Load original dataset and preprocess the loaded data.

In [None]:
# iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# # ori_data = np.load('../OUTPUT/{dataset_name}/samples/{dataset_name}_norm_truth_{seq_length}_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../toy_exp/ddpm_fake_sines.npy')


iterations = 5
# ori_data = np.load('../toy_exp/samples/sine_ground_truth_24_train.npy')
# ori_data = np.load('../OUTPUT/test_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
# fake_data = np.load('../OUTPUT/test_ep/ddpm_fake_test_ep_milestone_10.npy')

ori_data = np.load('../OUTPUT/etth_ep/samples/etth_norm_truth_24_train.npy')  # Uncomment the line if dataset other than Sine is used.
fake_data = np.load('../OUTPUT/etth_ep/ddpm_fake_etth_ep_milestone_10.npy')

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)
b,t,n = ori_data.shape


ori_data = ori_data.transpose(2, 0, 1).reshape(b * n, t, 1)

# fake_data = fake_data[:ori_data.shape[0]*ori_data.shape[2]]
# fake_data = fake_data.reshape(n, b, t).transpose(1, 2, 0)

print('ori shape is: ', ori_data.shape)
print('fake shape is: ', fake_data.shape)

ori shape is:  (2880, 24, 7)
fake shape is:  (20700, 24, 1)
ori shape is:  (20160, 24, 1)
fake shape is:  (20700, 24, 1)


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is | classification accuracy - 0.5 |.

- metric_iteration: the number of iterations for metric computation.

In [14]:
discriminative_score = []

for i in range(iterations):
    temp_disc, fake_acc, real_acc = discriminative_score_metrics(ori_data[:], fake_data[:ori_data.shape[0]])
    discriminative_score.append(temp_disc)
    print(f'Iter {i}: ', temp_disc, ',', fake_acc, ',', real_acc, '\n')
      
print('etth:')
display_scores(discriminative_score)
print()
# seed 12345  Final Score:  0.0731896551724138 ± 0.005795392020461213
# seed 2024  Final Score:  0.06945402298850574 ± 0.010002021083688875

# univariate Final Score:  0.004115022310361938 ± 0.003740431320553614


training: 100%|██████████| 2000/2000 [00:47<00:00, 42.51it/s]


Iter 0:  0.007316468253968256 , 0.4990079365079365 , 0.486359126984127 



training: 100%|██████████| 2000/2000 [00:43<00:00, 46.26it/s]


Iter 1:  0.018725198412698374 , 0.6309523809523809 , 0.40649801587301587 



training: 100%|██████████| 2000/2000 [00:43<00:00, 46.08it/s]


Iter 2:  0.007440476190476164 , 0.4791666666666667 , 0.5059523809523809 



training: 100%|██████████| 2000/2000 [00:44<00:00, 45.30it/s]


Iter 3:  0.003968253968253954 , 0.47197420634920634 , 0.5200892857142857 



training: 100%|██████████| 2000/2000 [00:47<00:00, 42.29it/s]


Iter 4:  0.008432539682539653 , 0.5528273809523809 , 0.46403769841269843 

etth:
Final Score:  0.00917658730158728 ± 0.006950281242411353



## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE. 

The model learns to predict the last dimension with one more step.

In [15]:
predictive_score = []
for i in range(iterations):
    temp_pred = predictive_score_metrics(ori_data, fake_data[:ori_data.shape[0]])
    predictive_score.append(temp_pred)
    print(i, ' epoch: ', temp_pred, '\n')
      
print('sine:')
display_scores(predictive_score)
print()

# univariate Final Score:  0.1605687830457955 ± 3.0256014600636085e-05

training: 100%|██████████| 5000/5000 [01:36<00:00, 51.57it/s]


0  epoch:  0.1308807371500961 



training: 100%|██████████| 5000/5000 [01:35<00:00, 52.18it/s]


1  epoch:  0.13087518834864081 



training: 100%|██████████| 5000/5000 [01:38<00:00, 50.55it/s]


2  epoch:  0.1309105058914271 



training: 100%|██████████| 5000/5000 [01:40<00:00, 49.91it/s]


3  epoch:  0.13091464250032891 



training: 100%|██████████| 5000/5000 [01:38<00:00, 50.71it/s]


4  epoch:  0.13092423811004286 

sine:
Final Score:  0.13090106240010715 ± 2.7013644546430714e-05

