# DualGAN: Dual Adversarial Time Series Generation via GANs and Autoencoders

In [1]:
## Necessary packages
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np

import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import logging
tf.get_logger().setLevel(logging.ERROR)

# 1. DualGAN model
from dualgan import dualgan

# 2. Data loading
from data_loading import real_data_loading, sine_data_generation
# 3. Metrics
from metrics.discriminative_metrics import discriminative_score_metrics
from metrics.predictive_metrics import predictive_score_metrics
from metrics.visualization_metrics import visualization

## Data Loading

Load original dataset and preprocess the loaded data.


In [2]:
original_data = []
generated_data = []

In [3]:
## Data loading
data_name = 'stock'
seq_len = 24

if data_name in ['stock', 'electricity', 'ECG']:
  original_data.append(real_data_loading(data_name, seq_len))

print(data_name + ' dataset is ready.')

stock dataset is ready.


In [4]:
## Data loading
data_name = 'sine'
seq_len = 64

if data_name == 'sine':
  # Set number of samples and its dimensions
  no, dim = 10000, 4
  original_data.append(sine_data_generation(no, seq_len, dim))
    
print(data_name + ' dataset is ready.')

sine dataset is ready.


In [5]:
## Data loading
data_name = 'ECG'
seq_len = 140

if data_name in ['stock', 'electricity', 'ECG']:
  original_data.append(real_data_loading(data_name, seq_len))
   
print(data_name + ' dataset is ready.')

ECG dataset is ready.


In [6]:
## Data loading
data_name = 'SWANSF'
seq_len = 60

if data_name in ['SWANSF','stock', 'energy', 'ECG']:
  original_data.append(real_data_loading(data_name, seq_len))
   
print(data_name + ' dataset is ready.')

SWANSF dataset is ready.


## Set network parameters

DualGAN network parameters should be optimized for different datasets.

- hidden_dim: hidden dimensions -> input 'same' or a number like 8
- num_layer: number of layers
- iteration: number of training iterations
- batch_size: the number of samples in each batch

In [7]:
## Newtork parameters
parameters = dict()

parameters['hidden_dim'] = 'same'
parameters['iterations'] = 7 * 1000
parameters['batch_size'] = 128
parameters['num_layer'] = 4

## Run synthetic time-series data generation

DualGAN uses the original data and network parameters to return the generated synthetic data.

It also utilizes the number of samples that need to be generated. If you enter 'same', it will produce an equal number of synthetic samples to match the real samples that you have. Otherwise, please enter a specific number.

In [None]:
dualgan_result = dualgan(original_data[0], parameters, 'same')

Start Embedding Network Training
step: 0/7000, AE_loss: 3.3492, AE_D_loss: 1.3232


## Evaluate the generated data

### 1. Discriminative score

To evaluate the classification accuracy between original and synthetic data using post-hoc RNN network. The output is |classification accuracy - 0.5|.

- metric_iteration: the number of iterations for metric computation.

In [None]:
metric_iteration = 6

discriminative_score = list()
for _ in range(metric_iteration):
  temp_disc = discriminative_score_metrics(original_data[0], dualgan_result)
  discriminative_score.append(temp_disc)

print('Discriminative score: ' + str(np.round(np.mean(discriminative_score), 4)))

## Evaluate the generated data

### 2. Predictive score

To evaluate the prediction performance on train on synthetic, test on real setting. More specifically, we use Post-hoc RNN architecture to predict one-step ahead and report the performance in terms of MAE.

In [None]:
predictive_score = list()
for tt in range(metric_iteration):
  temp_pred = predictive_score_metrics(original_data[0], dualgan_result)
  predictive_score.append(temp_pred)   
    
print('Predictive score: ' + str(np.round(np.mean(predictive_score), 4)))

## Evaluate the generated data

### 3. Visualization

We visualize the original and synthetic data distributions using PCA and tSNE analysis.

In [None]:
visualization(original_data[0], dualgan_result, 'pca', 'Sines')
visualization(original_data[0], dualgan_result, 'tsne', 'Sines')