In [1]:
from copy import deepcopy

import numpy as np
import pandas as pd

import pyvinecopulib as pv
from vcimpute.helper_datagen import make_complete_data_matrix, mask_MCAR
from vcimpute.helper_subvines import find_subvine_structures, remove_column, remove_inbetween, remove_var
from vcimpute.helper_vinestructs import relabel_vine_mat
from vcimpute.helper_diagonalize import diagonalize_copula
from vcimpute.simulator import simulate_order_k
from vcimpute.utils import make_triangular_array, is_leaf_in_all_subtrees

In [2]:
class VineCopFit:
    pass

In [3]:
X = make_complete_data_matrix(n=1000,d=10,copula_type='gaussian')
Xmis = mask_MCAR(X, d_mis=4, mask_fraction=.2, monotonic_missingness=True)
controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.gaussian], num_threads=10)
cop = pv.Vinecop(data=Xmis, controls=controls)

In [4]:
T = cop.matrix
pcs = cop.pair_copulas

In [5]:
miss_vars, = np.where(np.count_nonzero(np.isnan(Xmis), axis=0))
miss_vars += 1
miss_vars = miss_vars.astype(np.uint64)

In [6]:
# tmp
T,pcs = remove_var(T,pcs,7)
T,pcs = remove_var(T,pcs,8)
T,pcs = remove_var(T,pcs,9)

In [7]:
T

array([[ 4, 10, 10,  4,  1,  4,  4],
       [10,  2,  4,  1,  4,  1,  0],
       [ 2,  4,  1,  3,  3,  0,  0],
       [ 1,  1,  3, 10,  0,  0,  0],
       [ 6,  3,  2,  0,  0,  0,  0],
       [ 3,  6,  0,  0,  0,  0,  0],
       [ 5,  0,  0,  0,  0,  0,  0]], dtype=uint64)

In [9]:
Ximp = np.copy(Xmis)

for var_mis in miss_vars[::-1]:
    subvine_structures = find_subvine_structures(T, pcs, var_mis)
    
    ximp = []
    
    for Tsub, pcs_sub in subvine_structures:
        d2 = Tsub.shape[0]
        assert (Tsub[d2-1,0] == var_mis) or (Tsub[d2-2,0] == var_mis)
        
        ordered_old_vars = filter(lambda x: x != 0, np.unique(Tsub))
        old_to_new = {var_old: k + 1 for k,  var_old in enumerate(ordered_old_vars)}
        new_to_old = {v: k for k, v in old_to_new.items()}
        Tnew = relabel_vine_mat(Tsub, old_to_new)
        cop = pv.Vinecop(structure=pv.RVineStructure(Tnew), pair_copulas=pcs_sub)
        Ximp2 = Ximp[:, [int(new_to_old[i+1] - 1) for i in range(len(new_to_old))]]

        
        if (Tsub[d2-2,0] == var_mis) and (Tsub[d2-2,1] == var_mis):
            cop = diagonalize_copula(cop, cop.order[1])
            ximp.append(simulate_order_k(cop, Ximp2, 0))

        if Tsub[d2-1,0] == var_mis:
            ximp.append(simulate_order_k(cop, Ximp2, 0))
    
    ximp = np.vstack(ximp).T
    n_avail = ximp.shape[1] - np.count_nonzero(np.isnan(ximp),axis=1)
    assert np.all(n_avail) > 0
    idx_mis = int(var_mis - 1)
    missing = np.isnan(Ximp[:, idx_mis])
    Ximp[missing, idx_mis] = np.nansum(ximp[missing],axis=1) / n_avail[missing]
    break

In [13]:
np.any(np.isnan(Ximp[:, 9]))

False