In [2]:
import pandas as pd

data = pd.DataFrame({
    'integer': [1, 2, 1, 2, 1, 2, 3, 1],
    'float': [0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.3, 0.1],
    'categorical': ['a', 'b', 'a', 'b', 'a', None, 'c', None],
    'bool': [False, True, False, True, False, False, False, False],
    'nullable': [1, None, 3, None, 5, None, 7, None],
    'datetime': [
        '2010-01-01', '2010-02-01', '2010-01-01', '2010-02-01',
        '2010-01-01', '2010-02-01', '2010-03-01', '2010-02-01'
    ]
})

data['datetime'] = pd.to_datetime(data['datetime'])

data

Unnamed: 0,integer,float,categorical,bool,nullable,datetime
0,1,0.1,a,False,1.0,2010-01-01
1,2,0.2,b,True,,2010-02-01
2,1,0.1,a,False,3.0,2010-01-01
3,2,0.2,b,True,,2010-02-01
4,1,0.1,a,False,5.0,2010-01-01
5,2,0.2,,False,,2010-02-01
6,3,0.3,c,False,7.0,2010-03-01
7,1,0.1,,False,,2010-02-01


In [3]:
from rdt import HyperTransformer

dtypes = list(data.dtypes)
dtypes[0] = np.float64
dtypes

[numpy.float64,
 dtype('float64'),
 dtype('O'),
 dtype('bool'),
 dtype('float64'),
 dtype('<M8[ns]')]

In [4]:
ht = HyperTransformer(anonymize={'categorical': 'email'}, dtypes=dtypes)

ht.fit(data)

In [5]:
ht._transformers['integer'].dtype

float

In [6]:
trans_test = ht.transform(data)

trans_test

Unnamed: 0,integer,float,categorical,bool,nullable,nullable#1,datetime
0,1,0.1,0.173133,0,1.0,0.0,1.262304e+18
1,2,0.2,0.724116,1,4.0,1.0,1.264982e+18
2,1,0.1,0.256392,0,3.0,0.0,1.262304e+18
3,2,0.2,0.778121,1,4.0,1.0,1.264982e+18
4,1,0.1,0.190051,0,5.0,0.0,1.262304e+18
5,2,0.2,0.552532,0,4.0,1.0,1.264982e+18
6,3,0.3,0.942299,0,7.0,0.0,1.267402e+18
7,1,0.1,0.513491,0,4.0,1.0,1.264982e+18


In [7]:
from copulas.multivariate import GaussianMultivariate

gm = GaussianMultivariate()

gm.fit(trans_test)

sampled_test = gm.sample(len(trans_test))
sampled_test

Unnamed: 0,integer,float,categorical,bool,nullable,nullable#1,datetime
0,2.014993,0.201499,0.68199,0.241421,4.884977,0.649483,1.265426e+18
1,0.440604,0.04406,0.180029,0.710767,0.418769,1.433112,1.261395e+18
2,3.123151,0.312315,0.80938,-0.33955,7.675415,-0.563078,1.266783e+18
3,0.876919,0.087692,0.254475,0.092358,1.267386,0.753787,1.2629e+18
4,1.265381,0.126538,0.417505,-0.326069,3.217951,0.353283,1.264263e+18
5,2.630478,0.263048,0.895418,0.285926,5.884528,0.393362,1.266967e+18
6,0.907345,0.090734,0.304349,-0.291128,1.947946,0.219023,1.263645e+18
7,1.219859,0.121986,0.28179,0.09749,2.71524,-0.109891,1.262796e+18


In [8]:
ht.reverse_transform(sampled_test)

Unnamed: 0,integer,float,categorical,bool,nullable,datetime
0,2.014993,0.201499,wayne75@horne.org,False,,2010-02-06 03:07:45.791276800
1,0.440604,0.04406,fryjason@hotmail.com,True,,2009-12-21 11:30:59.804839680
2,3.123151,0.312315,wayne75@horne.org,False,7.675415,2010-02-21 20:06:08.210644224
3,0.876919,0.087692,fryjason@hotmail.com,False,,2010-01-07 21:38:01.844807424
4,1.265381,0.126538,bvalentine@brown.com,False,3.217951,2010-01-23 16:08:49.942173440
5,2.630478,0.263048,fergusonaaron@hotmail.com,False,5.884528,2010-02-23 23:15:50.835597824
6,0.907345,0.090734,fryjason@hotmail.com,False,1.947946,2010-01-16 12:24:36.063659520
7,1.219859,0.121986,fryjason@hotmail.com,False,2.71524,2010-01-06 16:39:10.623979264
