In [None]:
from copy import deepcopy

import numpy as np
import pandas as pd

import pyvinecopulib as pv
from vcimpute.helper_datagen import make_complete_data_matrix, mask_MCAR
from vcimpute.helper_subvines import find_subvine_structures, remove_column, remove_inbetween, remove_var
from vcimpute.helper_vinestructs import relabel_vine_mat, natural_order_mat
from vcimpute.helper_diagonalize import diagonalize_copula
from vcimpute.simulator import simulate_order_k
from vcimpute.utils import make_triangular_array, is_leaf_in_all_subtrees

In [None]:
class VineCopFit:
    pass

In [None]:
X = make_complete_data_matrix(n=1000,d=10,copula_type='gaussian')
Xmis = mask_MCAR(X, d_mis=4, mask_fraction=.1, monotonic_missingness=True)
controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.gaussian], num_threads=10)

In [None]:
cop_orig = pv.Vinecop(data=Xmis, controls=controls)
T_orig = cop_orig.matrix
pcs_orig = cop_orig.pair_copulas

In [None]:
# order from least missing to most
miss_vars, = np.where(np.count_nonzero(np.isnan(Xmis), axis=0))
miss_vars += 1
miss_vars = list(miss_vars.astype(np.uint64))

In [None]:
Ximp = np.copy(Xmis)

for cur_var_mis in miss_vars:
    
    # remove as-yet missing values
    T, pcs = T_orig, pcs_orig
    for rest_var_mis in miss_vars[(miss_vars.index(cur_var_mis) + 1):]:
        T, pcs = remove_var(T, pcs, rest_var_mis)
    subvine_structures = find_subvine_structures(T, pcs, cur_var_mis)

    ximp_lst = []
    for T_sub, pcs_sub in subvine_structures:
        d2 = T_sub.shape[0]
        assert is_leaf_in_all_subtrees(T_sub, cur_var_mis)
        
        # relabel indices
        ordered_old_vars = filter(lambda x: x != 0, np.unique(T_sub))
        old_to_new = {var_old: k + 1 for k,  var_old in enumerate(ordered_old_vars)}
        new_to_old = {v: k for k, v in old_to_new.items()}
        T_new = relabel_vine_mat(T_sub, old_to_new)
        cop_sub = pv.Vinecop(structure=pv.RVineStructure(T_new), pair_copulas=pcs_sub)
        Ximp2 = Ximp[:, [int(new_to_old[i+1] - 1) for i in range(len(new_to_old))]]
        
        if (T_sub[d2-2,0] == cur_var_mis):
            cop_sub_diag = diagonalize_copula(cop_sub, old_to_new[cur_var_mis])
            ximp_lst.append(simulate_order_k(cop_sub_diag, Ximp2, 0))

        if T_sub[d2-1,0] == cur_var_mis:
            ximp_lst.append(simulate_order_k(cop_sub, Ximp2, 0))
    
    ximp_mat = np.vstack(ximp_lst).T
    n_avail = ximp_mat.shape[1] - np.count_nonzero(np.isnan(ximp_mat),axis=1)
    assert np.all(n_avail) > 0
    idx_mis = int(cur_var_mis - 1)
    missing = np.isnan(Ximp[:, idx_mis])
    ximp = np.nansum(ximp_mat, axis=1) / n_avail
    Ximp[missing, idx_mis] = ximp[missing]

In [None]:
T_orig

In [None]:
from gcimpute.helper_evaluation import get_smae

In [None]:
from gcimpute.gaussian_copula import GaussianCopula

In [None]:
Ximp2 = GaussianCopula().fit_transform(Xmis)

In [None]:
get_smae(Ximp2, X, Xmis)

In [None]:
get_smae(Ximp, X, Xmis)