In [1]:
from copy import deepcopy

import numpy as np
import pandas as pd

import pyvinecopulib as pv
from vcimpute.helper_datagen import make_complete_data_matrix, mask_MCAR
from vcimpute.helper_subvines import find_subvine_structures, remove_column, remove_inbetween, remove_var
from vcimpute.helper_vinestructs import relabel_vine_mat, natural_order_mat
from vcimpute.helper_diagonalize import diagonalize_copula
from vcimpute.simulator import simulate_order_k
from vcimpute.utils import make_triangular_array, is_leaf_in_all_subtrees

In [2]:
class VineCopFit:
    pass

In [3]:
X = make_complete_data_matrix(n=1000,d=10,copula_type='gaussian')
Xmis = mask_MCAR(X, d_mis=4, mask_fraction=.1, monotonic_missingness=True)
controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.gaussian], num_threads=10)
cop = pv.Vinecop(data=Xmis, controls=controls)

T = cop.matrix
pcs = cop.pair_copulas

In [4]:
# order from least missing to most
miss_vars, = np.where(np.count_nonzero(np.isnan(Xmis), axis=0))
miss_vars += 1
miss_vars = miss_vars.astype(np.uint64)

In [5]:

T,pcs = remove_var(T,pcs,10)
T,pcs = remove_var(T,pcs,7)
T,pcs = remove_var(T,pcs,8)


In [22]:
from collections import deque

def find_subvine_structures(T, pcs, var_mis):
    unexplored = deque([(T, pcs)])
    accepted = []
    while len(unexplored) > 0:
        T_cur, pcs_cur = unexplored.pop()
        d_cur = T_cur.shape[0]
        for func in [remove_inbetween, remove_column]:
            T_cand, pcs_cand = func(T_cur, pcs_cur, var_mis, 0)
            if is_leaf_in_all_subtrees(T_cand, var_mis):
                accepted.append((T_cand, pcs_cand))
            elif T_cand.shape[0] > 1):
                unexplored.append((T_cand, pcs_cand))
    print(unexplored)
    return accepted


In [25]:
from gcimpute.helper_evaluation import get_smae

In [26]:
from gcimpute.gaussian_copula import GaussianCopula

In [27]:
Ximp2 = GaussianCopula().fit_transform(Xmis)

In [28]:
get_smae(Ximp2, X, Xmis)

There is no entry to be evaluated in variable 0.
There is no entry to be evaluated in variable 1.
There is no entry to be evaluated in variable 2.
There is no entry to be evaluated in variable 3.
There is no entry to be evaluated in variable 4.
There is no entry to be evaluated in variable 5.


array([       nan,        nan,        nan,        nan,        nan,
              nan, 0.48894062, 0.19839905, 0.3395389 , 0.48022892])

In [29]:
get_smae(Ximp, X, Xmis)

There is no entry to be evaluated in variable 0.
There is no entry to be evaluated in variable 1.
There is no entry to be evaluated in variable 2.
There is no entry to be evaluated in variable 3.
There is no entry to be evaluated in variable 4.
There is no entry to be evaluated in variable 5.


array([      nan,       nan,       nan,       nan,       nan,       nan,
             nan,       nan, 0.6589639,       nan])

In [30]:
np.any(np.isnan(Ximp[:, 9]))

True