In [1]:
import tensorflow as tf
import torch
import numpy as np
import pandas as pd
import tensorflow_probability as tfp
import matplotlib.pyplot as plt

In [2]:
from ctgan.transformer import DataTransformer
from ctgan.sampler import Sampler
from ctgan.conditional import ConditionalGenerator

In [3]:
DEMO_URL = 'http://ctgan-data.s3.amazonaws.com/census.csv.gz'
train_data = pd.read_csv(DEMO_URL, compression='gzip')
discrete_columns = [
    'workclass',
    'education',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'native-country',
    'income'
]

In [4]:
transformer = DataTransformer()
transformer.fit(train_data, discrete_columns)
train_data = transformer.transform(train_data)



In [5]:
data_sampler = Sampler(train_data, transformer.output_info)
data_dim = transformer.output_dimensions
cond_generator = ConditionalGenerator(
    train_data, transformer.output_info, True)

In [6]:
transformer.__dict__

{'n_clusters': 10,
 'epsilon': 0.005,
 'output_tensor': None,
 'cond_tensor': None,
 'output_info': [(1, 'tanh', 1),
  (10, 'softmax', 1),
  (9, 'softmax', 0),
  (1, 'tanh', 1),
  (9, 'softmax', 1),
  (16, 'softmax', 0),
  (1, 'tanh', 1),
  (10, 'softmax', 1),
  (7, 'softmax', 0),
  (15, 'softmax', 0),
  (6, 'softmax', 0),
  (5, 'softmax', 0),
  (2, 'softmax', 0),
  (1, 'tanh', 1),
  (5, 'softmax', 1),
  (1, 'tanh', 1),
  (3, 'softmax', 1),
  (1, 'tanh', 1),
  (10, 'softmax', 1),
  (42, 'softmax', 0),
  (2, 'softmax', 0)],
 'output_dimensions': 157,
 'dataframe': True,
 'dtypes': age                int64
 workclass         object
 fnlwgt             int64
 education         object
 education-num      int64
 marital-status    object
 occupation        object
 relationship      object
 race              object
 sex               object
 capital-gain       int64
 capital-loss       int64
 hours-per-week     int64
 native-country    object
 income            object
 dtype: object,
 'meta':

In [10]:
transformer.dtypes.values

array([dtype('int64'), dtype('O'), dtype('int64'), dtype('O'),
       dtype('int64'), dtype('O'), dtype('O'), dtype('O'), dtype('O'),
       dtype('O'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('O'), dtype('O')], dtype=object)

In [12]:
transformer.meta[0]

{'name': 'age',
 'model': BayesianGaussianMixture(covariance_prior=None, covariance_type='full',
                         degrees_of_freedom_prior=None, init_params='kmeans',
                         max_iter=100, mean_precision_prior=None,
                         mean_prior=None, n_components=10, n_init=1,
                         random_state=None, reg_covar=1e-06, tol=0.001,
                         verbose=0, verbose_interval=10, warm_start=False,
                         weight_concentration_prior=0.001,
                         weight_concentration_prior_type='dirichlet_process'),
 'components': array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True]),
 'output_info': [(1, 'tanh', 1), (10, 'softmax', 1)],
 'output_dimensions': 11}

In [18]:
import joblib
joblib.dump(transformer.__dict__, 'test.joblib')

['test.joblib']

In [20]:
joblib.load('test.joblib')

{'n_clusters': 10,
 'epsilon': 0.005,
 'output_tensor': None,
 'cond_tensor': None,
 'output_info': [(1, 'tanh', 1),
  (10, 'softmax', 1),
  (9, 'softmax', 0),
  (1, 'tanh', 1),
  (9, 'softmax', 1),
  (16, 'softmax', 0),
  (1, 'tanh', 1),
  (10, 'softmax', 1),
  (7, 'softmax', 0),
  (15, 'softmax', 0),
  (6, 'softmax', 0),
  (5, 'softmax', 0),
  (2, 'softmax', 0),
  (1, 'tanh', 1),
  (5, 'softmax', 1),
  (1, 'tanh', 1),
  (3, 'softmax', 1),
  (1, 'tanh', 1),
  (10, 'softmax', 1),
  (42, 'softmax', 0),
  (2, 'softmax', 0)],
 'output_dimensions': 157,
 'dataframe': True,
 'dtypes': age                int64
 workclass         object
 fnlwgt             int64
 education         object
 education-num      int64
 marital-status    object
 occupation        object
 relationship      object
 race              object
 sex               object
 capital-gain       int64
 capital-loss       int64
 hours-per-week     int64
 native-country    object
 income            object
 dtype: object,
 'meta':

In [22]:
from ctgan.models import Generator
transformer.generate_tensors()
generator = Generator(
            232, (256,256), transformer.output_dimensions,
            transformer.output_tensor, 0.2)

In [26]:
generator.build((512, generator._input_dim))

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Mangled names are not yet supported by AutoGraph
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Mangled names are not yet supported by AutoGraph


In [28]:
generator.summary()

Model: "generator_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
residual_layer_2 (ResidualLa multiple                  60672     
_________________________________________________________________
residual_layer_3 (ResidualLa multiple                  126208    
_________________________________________________________________
gen_activation_1 (GenActivat multiple                  116965    
Total params: 303,845
Trainable params: 302,821
Non-trainable params: 1,024
_________________________________________________________________


In [32]:
generator.get_weights()

[array([[ 0.05824626, -0.01773888, -0.00658673, ...,  0.03423331,
          0.03005577, -0.01904987],
        [-0.00468313,  0.02287589,  0.01631787, ..., -0.03742629,
         -0.02813999,  0.01977238],
        [-0.02690546, -0.02492707,  0.02072357, ..., -0.02056858,
          0.03618379,  0.00820202],
        ...,
        [ 0.02840538,  0.05025441,  0.02812538, ...,  0.0639188 ,
         -0.04886543, -0.05412531],
        [ 0.0259916 ,  0.00229041,  0.00782361, ...,  0.03988334,
         -0.01539917, -0.01573783],
        [ 0.0004133 , -0.02618131, -0.05858548, ..., -0.04224579,
         -0.00418184,  0.05288434]], dtype=float32),
 array([ 0.00453439, -0.01608643, -0.03239703, -0.00422207, -0.01763561,
        -0.01083697,  0.0592666 ,  0.03377992, -0.03190537,  0.06031573,
        -0.05214811,  0.05278343,  0.00204156, -0.02296931, -0.04097619,
        -0.04338297,  0.04824249, -0.06302157, -0.04665298, -0.01239519,
         0.03784238,  0.00511236, -0.05805079,  0.02108569, -0.051

In [31]:
generator.set_weights(generator.get_weights())

In [33]:
import os
os.path.dirname('/home/pbm/pedro.xpit')

'/home/pbm'

In [34]:
transformer

<ctgan.transformer.DataTransformer at 0x7f68b357d550>

In [35]:
dt = DataTransformer()
dt.__dict__ = transformer.__dict__

In [36]:
dt

<ctgan.transformer.DataTransformer at 0x7f68b0f24b50>

In [39]:
for k,v in dt.__dict__.values():
    print(k)

TypeError: cannot unpack non-iterable int object