In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorpack.utils import logger

from tgan.data import load_data
from tgan.model import TUNABLE_VARIABLES, TGANModel
from tgan.evaluation import evaluate_classification

In [2]:
def prepare_hyperparameter_search(steps_per_epoch, num_random_search):
    model_kwargs = []
    basic_kwargs = {
        'max_epoch': 1,
        'steps_per_epoch': steps_per_epoch,
        'restore_session': False
    }
    for i in range(num_random_search):
        kwargs = {name: np.random.choice(choices) for name, choices in TUNABLE_VARIABLES.items()}
        kwargs.update(basic_kwargs)
        model_kwargs.append(kwargs)

    return model_kwargs

In [3]:
name = 'experiment'
max_epoch = 1
steps_per_epoch = 5
output_epoch = 1
sample_rows = 1000
file_path = 'data/census-train.csv'
continuous_columns = [0, 5, 16, 17, 18, 29, 38]
num_random_search = 5
original_data = pd.read_csv(file_path)

In [4]:
# Load and split data
test_data, train_data = train_test_split(original_data, train_size=0.8, shuffle=True)



In [5]:
# Prepare hyperparameter search
model_kwargs = prepare_hyperparameter_search(steps_per_epoch, num_random_search)

In [6]:
# Training models and sampling data
synthesized_data = []
for index, kwargs in enumerate(model_kwargs):
    logger.info('Training TGAN Model %d/%d', index + 1, num_random_search)
    
    tf.reset_default_graph()
    model = TGANModel(continuous_columns, output='{}/model_{}'.format(name, index), **kwargs)
    model.fit(train_data)
    synthesized_data.append(model.sample(sample_rows))


[32m[0404 00:21:18 @<ipython-input-6-ebf7691fed61>:3][0m Training TGAN Model 1/5
[32m[0404 00:21:20 @input_source.py:222][0m Setting up the queue 'QueueInput/input_queue' for CPU prefetching ...
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
[32m[0404 00:21:20 @registry.py:126][0m gen/LSTM/00/FC input: [50, 400]
[32m[0404 00:21:20 @registry.py:134][0m gen/LSTM/00/FC output: [50, 600]
[32m[0404 00:21:20 @registry.py:126][0m gen/LSTM/00/FC2 input: [50, 600]
[32m[0404 00:21:20 @registry.py:134][0m gen/LSTM/00/FC2 output: [50, 1]
[32m[0404 00:21:20 @registry.py:126][0m gen/LSTM/01/FC input: [50, 400]
[32m[0404 00:21:20 @registry.py:134][0m gen/LSTM/01/FC output: [50, 600]
[32m[0404 00:21:20 @registry.py:126][0m gen/LSTM/01/FC2 input: [50, 600]
[32m[0404 00:21:20 @registry.py:134][0m gen/LSTM/01/FC2 output: [50, 5]
[32

100%|##########|5/5[00:21<00:00, 0.19it/s]

[32m[0404 00:22:17 @base.py:285][0m Epoch 1 (global_step 5) finished, time:21.7 seconds.





[32m[0404 00:22:18 @saver.py:79][0m Model saved to experiment/model_0/model/model-5.
[32m[0404 00:22:18 @monitor.py:467][0m QueueInput/queue_size: 50
[32m[0404 00:22:18 @monitor.py:467][0m train/GAN_loss/discrim/accuracy_fake: 0.4
[32m[0404 00:22:18 @monitor.py:467][0m train/GAN_loss/discrim/accuracy_real: 0.6
[32m[0404 00:22:18 @monitor.py:467][0m train/GAN_loss/discrim/loss: 0.93679
[32m[0404 00:22:18 @monitor.py:467][0m train/GAN_loss/gen/final-g-loss: 28.347
[32m[0404 00:22:18 @monitor.py:467][0m train/GAN_loss/gen/klloss: 27.156
[32m[0404 00:22:18 @monitor.py:467][0m train/GAN_loss/gen/loss: 1.1903
[32m[0404 00:22:18 @base.py:289][0m Training has finished!
[32m[0404 00:22:18 @input_source.py:178][0m EnqueueThread QueueInput/input_queue Exited.
[32m[0404 00:22:24 @collection.py:146][0m New collections created in tower : tf.GraphKeys.REGULARIZATION_LOSSES
[32m[0404 00:22:24 @collection.py:165][0m These collections were modified but restored in : (tf.GraphKeys

 10%|9         |19/200[00:09<01:27, 2.06it/s]

[32m[0404 00:22:38 @<ipython-input-6-ebf7691fed61>:3][0m Training TGAN Model 2/5





[32m[0404 00:22:40 @input_source.py:222][0m Setting up the queue 'QueueInput/input_queue' for CPU prefetching ...
[32m[0404 00:22:47 @registry.py:126][0m discrim/dis_fc2/fc input: [200, 310]
[32m[0404 00:22:47 @registry.py:134][0m discrim/dis_fc2/fc output: [200, 300]
[32m[0404 00:22:47 @registry.py:126][0m discrim/dis_fc2/fc_diversity input: [200, 300]
[32m[0404 00:22:47 @registry.py:134][0m discrim/dis_fc2/fc_diversity output: [200, 100]
[32m[0404 00:23:04 @logger.py:90][0m Argv: /home/xino/.virtualenvs/tgan_mit/lib/python3.6/site-packages/ipykernel_launcher.py -f /run/user/1000/jupyter/kernel-60e1ed5e-3e1b-45c7-a833-c63b6fba9ca2.json
[32m[0404 00:23:04 @model_utils.py:67][0m [36mList of Trainable Variables: 
[0mname                              shape           #elements
--------------------------------  ------------  -----------
gen/LSTM/go:0                     [1, 200]              200
gen/LSTM/lstm_cell/kernel:0       [1400, 2000]      2800000
gen/LSTM/lstm_cell/b

100%|##########|5/5[00:40<00:00, 0.11it/s]

[32m[0404 00:23:52 @base.py:285][0m Epoch 1 (global_step 5) finished, time:40.7 seconds.





[32m[0404 00:23:53 @saver.py:79][0m Model saved to experiment/model_1/model/model-5.
[32m[0404 00:23:53 @monitor.py:467][0m QueueInput/queue_size: 50
[32m[0404 00:23:53 @monitor.py:467][0m train/GAN_loss/discrim/accuracy_fake: 0.355
[32m[0404 00:23:53 @monitor.py:467][0m train/GAN_loss/discrim/accuracy_real: 0.62
[32m[0404 00:23:53 @monitor.py:467][0m train/GAN_loss/discrim/loss: 0.9066
[32m[0404 00:23:53 @monitor.py:467][0m train/GAN_loss/gen/final-g-loss: 22.288
[32m[0404 00:23:53 @monitor.py:467][0m train/GAN_loss/gen/klloss: 21.476
[32m[0404 00:23:53 @monitor.py:467][0m train/GAN_loss/gen/loss: 0.81233
[32m[0404 00:23:53 @base.py:289][0m Training has finished!
[32m[0404 00:23:53 @input_source.py:178][0m EnqueueThread QueueInput/input_queue Exited.
[32m[0404 00:23:59 @collection.py:146][0m New collections created in tower : tf.GraphKeys.REGULARIZATION_LOSSES
[32m[0404 00:23:59 @collection.py:165][0m These collections were modified but restored in : (tf.GraphK

  2%|2         |4/200[00:06<05:10, 0.63it/s]

[32m[0404 00:24:11 @<ipython-input-6-ebf7691fed61>:3][0m Training TGAN Model 3/5





[32m[0404 00:24:13 @input_source.py:222][0m Setting up the queue 'QueueInput/input_queue' for CPU prefetching ...
[32m[0404 00:24:19 @registry.py:126][0m discrim/dis_fc3/fc input: [100, 510]
[32m[0404 00:24:19 @registry.py:134][0m discrim/dis_fc3/fc output: [100, 500]
[32m[0404 00:24:19 @registry.py:126][0m discrim/dis_fc3/fc_diversity input: [100, 500]
[32m[0404 00:24:19 @registry.py:134][0m discrim/dis_fc3/fc_diversity output: [100, 100]
[32m[0404 00:24:34 @logger.py:90][0m Argv: /home/xino/.virtualenvs/tgan_mit/lib/python3.6/site-packages/ipykernel_launcher.py -f /run/user/1000/jupyter/kernel-60e1ed5e-3e1b-45c7-a833-c63b6fba9ca2.json
[32m[0404 00:24:34 @model_utils.py:67][0m [36mList of Trainable Variables: 
[0mname                              shape           #elements
--------------------------------  ------------  -----------
gen/LSTM/go:0                     [1, 100]              100
gen/LSTM/lstm_cell/kernel:0       [1150, 2000]      2300000
gen/LSTM/lstm_cell/b

100%|##########|5/5[00:27<00:00, 0.13it/s]

[32m[0404 00:25:10 @base.py:285][0m Epoch 1 (global_step 5) finished, time:28 seconds.





[32m[0404 00:25:11 @saver.py:79][0m Model saved to experiment/model_2/model/model-5.
[32m[0404 00:25:11 @monitor.py:467][0m QueueInput/queue_size: 50
[32m[0404 00:25:11 @monitor.py:467][0m train/GAN_loss/discrim/accuracy_fake: 0.27
[32m[0404 00:25:11 @monitor.py:467][0m train/GAN_loss/discrim/accuracy_real: 0.65
[32m[0404 00:25:11 @monitor.py:467][0m train/GAN_loss/discrim/loss: 0.97858
[32m[0404 00:25:11 @monitor.py:467][0m train/GAN_loss/gen/final-g-loss: 21.786
[32m[0404 00:25:11 @monitor.py:467][0m train/GAN_loss/gen/klloss: 21.163
[32m[0404 00:25:11 @monitor.py:467][0m train/GAN_loss/gen/loss: 0.62347
[32m[0404 00:25:11 @base.py:289][0m Training has finished!
[32m[0404 00:25:12 @input_source.py:178][0m EnqueueThread QueueInput/input_queue Exited.
[32m[0404 00:25:17 @collection.py:146][0m New collections created in tower : tf.GraphKeys.REGULARIZATION_LOSSES
[32m[0404 00:25:17 @collection.py:165][0m These collections were modified but restored in : (tf.GraphK

 18%|#8        |9/50[00:06<00:28, 1.44it/s]

[32m[0404 00:25:27 @<ipython-input-6-ebf7691fed61>:3][0m Training TGAN Model 4/5





[32m[0404 00:25:29 @input_source.py:222][0m Setting up the queue 'QueueInput/input_queue' for CPU prefetching ...
[32m[0404 00:25:48 @logger.py:90][0m Argv: /home/xino/.virtualenvs/tgan_mit/lib/python3.6/site-packages/ipykernel_launcher.py -f /run/user/1000/jupyter/kernel-60e1ed5e-3e1b-45c7-a833-c63b6fba9ca2.json
[32m[0404 00:25:48 @model_utils.py:67][0m [36mList of Trainable Variables: 
[0mname                              shape         #elements
--------------------------------  ----------  -----------
gen/LSTM/go:0                     [1, 400]            400
gen/LSTM/lstm_cell/kernel:0       [850, 800]       680000
gen/LSTM/lstm_cell/bias:0         [800]               800
gen/LSTM/00/FC/W:0                [200, 400]        80000
gen/LSTM/00/FC/b:0                [400]               400
gen/LSTM/00/FC2/W:0               [400, 1]            400
gen/LSTM/00/FC2/b:0               [1]                   1
gen/LSTM/00/attw:0                [1, 1, 1]             1
gen/LSTM/01/FC/W:0

100%|##########|5/5[00:19<00:00, 0.19it/s]

[32m[0404 00:26:15 @base.py:285][0m Epoch 1 (global_step 5) finished, time:20 seconds.





[32m[0404 00:26:15 @saver.py:79][0m Model saved to experiment/model_3/model/model-5.
[32m[0404 00:26:15 @monitor.py:467][0m QueueInput/queue_size: 50
[32m[0404 00:26:15 @monitor.py:467][0m train/GAN_loss/discrim/accuracy_fake: 0.33
[32m[0404 00:26:15 @monitor.py:467][0m train/GAN_loss/discrim/accuracy_real: 0.695
[32m[0404 00:26:15 @monitor.py:467][0m train/GAN_loss/discrim/loss: 0.89876
[32m[0404 00:26:15 @monitor.py:467][0m train/GAN_loss/gen/final-g-loss: 22.046
[32m[0404 00:26:15 @monitor.py:467][0m train/GAN_loss/gen/klloss: 21.069
[32m[0404 00:26:15 @monitor.py:467][0m train/GAN_loss/gen/loss: 0.9772
[32m[0404 00:26:15 @base.py:289][0m Training has finished!
[32m[0404 00:26:15 @input_source.py:178][0m EnqueueThread QueueInput/input_queue Exited.
[32m[0404 00:26:22 @collection.py:146][0m New collections created in tower : tf.GraphKeys.REGULARIZATION_LOSSES
[32m[0404 00:26:22 @collection.py:165][0m These collections were modified but restored in : (tf.GraphK

  8%|8         |4/50[00:03<00:35, 1.31it/s]

[32m[0404 00:26:28 @<ipython-input-6-ebf7691fed61>:3][0m Training TGAN Model 5/5





[32m[0404 00:26:30 @input_source.py:222][0m Setting up the queue 'QueueInput/input_queue' for CPU prefetching ...
[32m[0404 00:26:35 @registry.py:126][0m discrim/dis_fc4/fc input: [100, 210]
[32m[0404 00:26:35 @registry.py:134][0m discrim/dis_fc4/fc output: [100, 200]
[32m[0404 00:26:35 @registry.py:126][0m discrim/dis_fc4/fc_diversity input: [100, 200]
[32m[0404 00:26:35 @registry.py:134][0m discrim/dis_fc4/fc_diversity output: [100, 100]
[32m[0404 00:26:49 @logger.py:90][0m Argv: /home/xino/.virtualenvs/tgan_mit/lib/python3.6/site-packages/ipykernel_launcher.py -f /run/user/1000/jupyter/kernel-60e1ed5e-3e1b-45c7-a833-c63b6fba9ca2.json
[32m[0404 00:26:50 @model_utils.py:67][0m [36mList of Trainable Variables: 
[0mname                              shape           #elements
--------------------------------  ------------  -----------
gen/LSTM/go:0                     [1, 300]              300
gen/LSTM/lstm_cell/kernel:0       [1000, 1200]      1200000
gen/LSTM/lstm_cell/b

100%|##########|5/5[00:23<00:00, 0.15it/s]

[32m[0404 00:27:24 @base.py:285][0m Epoch 1 (global_step 5) finished, time:23.9 seconds.





[32m[0404 00:27:24 @saver.py:79][0m Model saved to experiment/model_4/model/model-5.
[32m[0404 00:27:24 @monitor.py:467][0m QueueInput/queue_size: 50
[32m[0404 00:27:24 @monitor.py:467][0m train/GAN_loss/discrim/accuracy_fake: 0.21
[32m[0404 00:27:24 @monitor.py:467][0m train/GAN_loss/discrim/accuracy_real: 0.74
[32m[0404 00:27:24 @monitor.py:467][0m train/GAN_loss/discrim/loss: 0.97704
[32m[0404 00:27:24 @monitor.py:467][0m train/GAN_loss/gen/final-g-loss: 19.553
[32m[0404 00:27:24 @monitor.py:467][0m train/GAN_loss/gen/klloss: 18.779
[32m[0404 00:27:24 @monitor.py:467][0m train/GAN_loss/gen/loss: 0.77378
[32m[0404 00:27:24 @base.py:289][0m Training has finished!
[32m[0404 00:27:24 @input_source.py:178][0m EnqueueThread QueueInput/input_queue Exited.
[32m[0404 00:27:30 @collection.py:146][0m New collections created in tower : tf.GraphKeys.REGULARIZATION_LOSSES
[32m[0404 00:27:30 @collection.py:165][0m These collections were modified but restored in : (tf.GraphK

  9%|9         |9/100[00:04<00:39, 2.30it/s]


In [None]:
for index, dataset in enumerate(synthesized_data):
    dataset.to_csv('{}.csv'.format(index), index=False, header=True)

In [6]:
if 'synthesized_data' not in locals():
    synthesized_data = []
    for i in range(num_random_search):
        print(i)
        synthesized_data.append(pd.read_csv('{}.csv'.format(i)))

0
1
2
3
4


In [8]:
# Evaluating synthesized data
for index, sampled_train_data in enumerate(synthesized_data):
    print(index)
    model_kwargs[index]['score'] = evaluate_classification(sampled_train_data, test_data, continuous_columns)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  dataset = pd.concat([train_data, test_data])


MemoryError: 

In [2]:
import pandas as pd
import numpy as np

In [6]:
df = pd.DataFrame(np.full((5, 2), 7))
df

Unnamed: 0,0,1
0,7,7
1,7,7
2,7,7
3,7,7
4,7,7


In [7]:
df[1]

0    7
1    7
2    7
3    7
4    7
Name: 1, dtype: int64

In [11]:
df.iloc[:, 1]

0    7
1    7
2    7
3    7
4    7
Name: 1, dtype: int64