In [83]:
from matplotlib import pyplot as plt

In [84]:
import numpy as np

from gcimpute.gaussian_copula import GaussianCopula
from gcimpute.helper_data import generate_mixed_from_gc
from gcimpute.helper_evaluation import get_smae
from gcimpute.helper_mask import mask_MCAR

import pyvinecopulib as pv # https://vinecopulib.github.io/pyvinecopulib/_generate/pyvinecopulib.Vinecop.cdf.html

In [85]:
d = 6

In [86]:
# generate and mask 15-dim mixed data (5 continuous variables, 5 ordinal variables (1-5) and 5 boolean variables) 
X = generate_mixed_from_gc(n=100, var_types={'cont': list(range(d))})
X_mask = mask_MCAR(X, mask_fraction=0.3, allow_empty_row=False)

# model fitting 
model = GaussianCopula(verbose=1)
X_imp = model.fit_transform(X=X_mask)

Iteration 1: copula parameter change 0.0657, likelihood -6.9793
Iteration 2: copula parameter change 0.0357, likelihood -6.9296
Iteration 3: copula parameter change 0.0229, likelihood -6.9012
Iteration 4: copula parameter change 0.0166, likelihood -6.8831
Iteration 5: copula parameter change 0.0129, likelihood -6.8705
Iteration 6: copula parameter change 0.0103, likelihood -6.8611
Iteration 7: copula parameter change 0.0084, likelihood -6.8538
Convergence achieved at iteration 7


In [87]:
# Evaluation: compute the scaled-MAE (SMAE) for each data type (scaled by MAE of median imputation) 
smae = get_smae(X_imp, X, X_mask)
print(f'The SMAE across continous variables: mean {smae[:d].mean():.3f} and std {smae[:d].std():.3f}')

The SMAE across continous variables: mean 0.738 and std 0.113


In [88]:
cop = pv.Vinecop(d=d)
u = pv.to_pseudo_obs(X)
cop.select(data=u)

In [103]:
cop.structure

<pyvinecopulib.RVineStructure>
5 3 1 6 6 6 
3 1 6 2 2 
1 6 2 1 
6 2 3 
2 5 
4 

In [107]:
for t in range(d-1):
    for e in range(d-t-1):
        print(f'({t},{e})', cop.structure.struct_array(t, e, False))

(0,0) 5
(0,1) 3
(0,2) 1
(0,3) 6
(0,4) 6
(1,0) 3
(1,1) 1
(1,2) 6
(1,3) 2
(2,0) 1
(2,1) 6
(2,2) 2
(3,0) 6
(3,1) 2
(4,0) 2


In [97]:
T = np.zeros(shape=(d,d))

In [118]:
T = np.zeros(shape=(d,d))

order = cop.order[::-1]

for i in range(d):
    T[i, i] = order[i]

for t in range(d-1):
    for e in range(d-t-1):
        T[t, d - e - 1] = cop.structure.struct_array(t, e, False)
        
T

array([[6., 6., 6., 1., 3., 5.],
       [0., 2., 2., 6., 1., 3.],
       [0., 0., 1., 2., 6., 1.],
       [0., 0., 0., 3., 2., 6.],
       [0., 0., 0., 0., 5., 2.],
       [0., 0., 0., 0., 0., 4.]])

In [123]:
# natural order (not right)
T = np.zeros(shape=(d,d))

for i in range(d):
    T[i, i] = d - i

for t in range(d-1):
    for e in range(d-t-1):
        T[t, d - e - 1] = cop.structure.struct_array(t, e, True)
        
T

array([[6., 6., 6., 4., 3., 2.],
       [0., 5., 5., 6., 4., 3.],
       [0., 0., 4., 5., 6., 4.],
       [0., 0., 0., 3., 5., 6.],
       [0., 0., 0., 0., 2., 5.],
       [0., 0., 0., 0., 0., 1.]])