In [37]:
import numpy as np
import pandas as pd

import pyvinecopulib as pv
from gcimpute.helper_evaluation import get_smae
from vcimpute.generate_data import make_complete_data_matrix, mask_MCAR
from vcimpute.simulator import simulate_order0, simulate_orderk, vfunc, find
from vcimpute.util import get, make_triangular_array, get_order

In [2]:
d = 15

In [3]:
X = make_complete_data_matrix(1000, d, 'clayton', vine_structure='R')
Xmis = mask_MCAR(X, mask_fraction=0.2, d_mis=2, monotonic_missingness=True)

In [10]:
np.any(pd.isnull(Xmis), axis=0)

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True,  True])

In [76]:
import random

In [160]:
rest_indices = list(range(1,d-1))
random.shuffle(rest_indices)
structure = pv.CVineStructure(order=[15,14] + rest_indices)

In [161]:
structure

<pyvinecopulib.CVineStructure>
3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 
9 9 9 9 9 9 9 9 9 9 9 9 9 9 
12 12 12 12 12 12 12 12 12 12 12 12 12 
10 10 10 10 10 10 10 10 10 10 10 10 
11 11 11 11 11 11 11 11 11 11 11 
4 4 4 4 4 4 4 4 4 4 
5 5 5 5 5 5 5 5 5 
1 1 1 1 1 1 1 1 
8 8 8 8 8 8 8 
13 13 13 13 13 13 
6 6 6 6 6 
2 2 2 2 
7 7 7 
14 14 
15 

In [162]:
controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.clayton], num_threads=10)

d = X.shape[1]
pcs = make_triangular_array(d)
for j in range(d-1):
    for i in range(d-j-1):
        pcs[i][j] = pv.Bicop()

In [232]:
cop = pv.Vinecop(structure=structure, pair_copulas=pcs)
cop.select(Xmis, controls=controls)
Ximp = np.copy(Xmis)
order1 = simulate_orderk(cop, Xmis, 1)
Ximp[np.isnan(Ximp[:,d-2]), d-2] = order1[np.isnan(Ximp[:,d-2])]
cop.select(Ximp, controls=controls)
order0 = simulate_order0(cop, Ximp)
Ximp[np.isnan(Ximp[:,d-1]),d-1] = order0[np.isnan(Ximp[:,d-1])]

In [233]:
smae = get_smae(x_imp=Ximp, x_true=X, x_obs=Xmis)
print(f'SMAE average over all variables: {smae[~np.isnan(smae)].mean():.3f}')
smae

There is no entry to be evaluated in variable 0.
There is no entry to be evaluated in variable 1.
There is no entry to be evaluated in variable 2.
There is no entry to be evaluated in variable 3.
There is no entry to be evaluated in variable 4.
There is no entry to be evaluated in variable 5.
There is no entry to be evaluated in variable 6.
There is no entry to be evaluated in variable 7.
There is no entry to be evaluated in variable 8.
There is no entry to be evaluated in variable 9.
There is no entry to be evaluated in variable 10.
There is no entry to be evaluated in variable 11.
There is no entry to be evaluated in variable 12.
SMAE average over all variables: 0.089


array([       nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan, 0.06059448, 0.11750986])

In [231]:
from gcimpute.gaussian_copula import GaussianCopula

model = GaussianCopula()
Ximp = model.fit_transform(X=Xmis)
smae = get_smae(x_imp=Ximp, x_true=X, x_obs=Xmis)
print(f'SMAE average over all variables: {smae[~np.isnan(smae)].mean():.3f}')

There is no entry to be evaluated in variable 0.
There is no entry to be evaluated in variable 1.
There is no entry to be evaluated in variable 2.
There is no entry to be evaluated in variable 3.
There is no entry to be evaluated in variable 4.
There is no entry to be evaluated in variable 5.
There is no entry to be evaluated in variable 6.
There is no entry to be evaluated in variable 7.
There is no entry to be evaluated in variable 8.
There is no entry to be evaluated in variable 9.
There is no entry to be evaluated in variable 10.
There is no entry to be evaluated in variable 11.
There is no entry to be evaluated in variable 12.
SMAE average over all variables: 0.078


In [124]:
smae

array([       nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan, 0.06689449, 0.08871315])