In [1]:
from multiprocessing import Pool
import numpy as np
import pyvinecopulib as pv

from vcimpute.simulator import simulate_order_k
from vcimpute.helper_vinestructs import generate_r_vine_structure
from vcimpute.helper_datagen import make_complete_data_matrix, mask_MCAR
from gcimpute.helper_evaluation import get_smae

In [2]:
d = 10
X = make_complete_data_matrix(1000, d, 'gaussian', seed=10)
X_mask = mask_MCAR(X, 'univariate', 0.2, seed=10)

In [14]:
idx_mis = np.where(np.any(np.isnan(X_mask), axis=0))[0].item()
var_mis = idx_mis + 1

In [22]:
all_vars = 1 + np.arange(d)
obs_vars = list(set(all_vars).difference([var_mis]))

In [None]:
structure = generate_r_vine_structure([var_mis], obs_vars)
controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.gaussian])

In [57]:
def process1(structure, X_mask):
    X_imp = np.copy(X_mask)

    cop = pv.Vinecop(structure)
    cop.select(X_imp, controls)
    
    x_imp = simulate_order_k(cop, X_imp, 0)
    nan_rows = np.isnan(X_imp[:,idx_mis])
    X_imp[nan_rows, idx_mis] = x_imp[nan_rows]
    
    return X_imp

In [61]:
def process2(structure, X_mask):
    X_imp = np.copy(X_mask)
    
    cop = pv.Vinecop(structure)
    cop.select(X_imp, controls)
    
    rb = cop.rosenblatt(X_imp)
    rb = np.ndarray.flatten(rb)
    nan_idx = np.flatnonzero(np.isnan(X_imp))
    rb[nan_idx] = np.random.uniform(size=len(nan_idx)) # replace rng
    rb = rb.reshape(X_imp.shape)
    nan_rows = np.any(np.isnan(X_imp), axis=1)
    X_imp[nan_rows, :] = cop.inverse_rosenblatt(rb[nan_rows, :])
    
    return X_imp

In [66]:
%timeit process1(structure, X_mask)

4.31 s ± 39.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [67]:
%timeit process2(structure, X_mask)

4.01 s ± 20.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
