In [8]:
import numpy as np
import pandas as pd
from vcimpute.helper_datagen import make_complete_data_matrix, mask_MCAR
from vcimpute.zeisberger import VineCopFit
from vcimpute import helper_mdp
from vcimpute.helper_vinestructs import generate_r_vine_structure
from vcimpute.utils import get
from gcimpute.helper_evaluation import get_smae
from gcimpute.gaussian_copula import GaussianCopula

X = make_complete_data_matrix(1000, 10, 'gaussian', seed=23, vine_structure='R')

In [9]:
X_mis = mask_MCAR(X, 'monotone', 0.10, seed=23, n_cols=4)

In [10]:
def smae_mean(X_imp, X, X_mis):
    smae = get_smae(X_imp, X, X_mis)
    return np.mean(smae[~np.isnan(smae)])

In [15]:
model = VineCopFit(['gaussian'], 10, True, 42)

In [11]:
X_imp1 = model.fit_transform(X_mis)
smae_mean(X_imp1, X, X_mis)

IndexError: index 1 is out of bounds for axis 0 with size 1

In [16]:
import pyvinecopulib as pv
cop_orig = pv.Vinecop(data=X_mis, controls=model.controls)
T_orig = cop_orig.matrix
pcs_orig = cop_orig.pair_copulas
d_orig = T_orig.shape[0]

X_imp = np.copy(X_mis)

In [22]:
import logging

import numpy as np
import pyvinecopulib as pv

from vcimpute.helper_diagonalize import diagonalize_copula
from vcimpute.helper_mdp import all_mdps, sort_mdps_by_increasing_missing_vars, mdp_coords, count_missing_by_col
from vcimpute.helper_subvines import find_subvine_structures, remove_var
from vcimpute.helper_vinestructs import generate_r_vine_structure, relabel_vine_matrix
from vcimpute.simulator import simulate_order_k
from vcimpute.utils import get, bicop_family_map, make_triangular_array, is_leaf_in_all_subtrees

n_miss_by_col = count_missing_by_col(X_imp)
miss_vars = 1 + np.where(n_miss_by_col)[0]
miss_vars = list(miss_vars[n_miss_by_col[np.where(n_miss_by_col)[0]].argsort()])
miss_coords = np.where(np.any(np.isnan(X_imp), axis=1))[0]

for cur_var_mis in miss_vars:
    # remove as-yet missing values
    T, pcs = T_orig, pcs_orig
    for rest_var_mis in miss_vars[(miss_vars.index(cur_var_mis) + 1):]:
        T, pcs = remove_var(T, pcs, rest_var_mis)
    subvine_structures = find_subvine_structures(T, pcs, cur_var_mis)

    # collect cur_var_mis imputed values per sub-vine structure
    ximp_lst = []
    for T_sub, pcs_sub in subvine_structures:
        imputed = False
        d_sub = T_sub.shape[0]
        assert is_leaf_in_all_subtrees(T_sub, cur_var_mis)

        # relabel indices
        ordered_old_vars = filter(lambda x: x != 0, np.unique(T_sub))
        old_to_new = {var_old: k + 1 for k, var_old in enumerate(ordered_old_vars)}
        new_to_old = {v: k for k, v in old_to_new.items()}
        T_sub_relabel = relabel_vine_matrix(T_sub, old_to_new)
        cop_sub = pv.Vinecop(structure=pv.RVineStructure(T_sub_relabel), pair_copulas=pcs_sub)
        X_imp_sub = X_imp[:, [int(new_to_old[i + 1] - 1) for i in range(len(new_to_old))]]

        if T_sub[d_sub - 2, 0] == cur_var_mis:
            cop_sub_diag = diagonalize_copula(cop_sub, old_to_new[cur_var_mis])
            ximp_lst.append(simulate_order_k(cop_sub_diag, X_imp_sub, 0))
            imputed = True

        if T_sub[d_sub - 1, 0] == cur_var_mis:
            ximp_lst.append(simulate_order_k(cop_sub, X_imp_sub, 0))
            imputed = True

        # only keep the last imputation since it uses all available information
        if imputed and (d_sub == d_orig):
            ximp_lst = ximp_lst[-1]
            break

    # average imputations
    ximp_mat = np.vstack(ximp_lst).T
    ximp_mat = ximp_mat[miss_coords]
    n_avail = ximp_mat.shape[1] - np.count_nonzero(np.isnan(ximp_mat), axis=1)
    assert np.all(n_avail) > 0

    # insert imputed values back
    idx_mis = int(cur_var_mis - 1)
    ximp = np.nansum(ximp_mat, axis=1) / n_avail
    X_imp[miss_coords, idx_mis] = ximp

IndexError: index 1 is out of bounds for axis 0 with size 1

In [36]:
miss_coords

array([  1,   3,   6,   9,  12,  15,  16,  17,  18,  21,  23,  36,  39,
        42,  53,  64,  66,  74,  78,  80,  85,  88,  89,  92,  97,  98,
       104, 106, 107, 109, 114, 118, 120, 126, 128, 131, 141, 145, 151,
       154, 158, 159, 160, 163, 169, 174, 181, 197, 199, 200, 202, 203,
       212, 217, 221, 233, 248, 258, 262, 264, 265, 279, 282, 286, 287,
       288, 289, 308, 313, 314, 318, 335, 340, 342, 343, 348, 363, 367,
       369, 387, 400, 410, 416, 420, 433, 436, 440, 441, 443, 445, 447,
       448, 451, 459, 464, 470, 473, 476, 484, 493, 496, 507, 516, 517,
       524, 533, 535, 539, 548, 554, 557, 564, 566, 568, 572, 573, 595,
       598, 599, 600, 608, 610, 614, 625, 630, 643, 646, 653, 654, 659,
       662, 665, 680, 681, 682, 687, 690, 691, 696, 703, 706, 713, 715,
       717, 719, 723, 726, 727, 735, 737, 742, 745, 747, 749, 750, 751,
       755, 759, 761, 773, 774, 785, 788, 797, 798, 808, 811, 818, 824,
       825, 826, 832, 833, 837, 845, 846, 851, 857, 866, 868, 87

In [48]:
np.concatenate([ximp])

AttributeError: 'numpy.ndarray' object has no attribute 'append'

In [44]:
np.vstack(ximp_lst)[miss_coords]

array([[3.60980201e-01],
       [1.27049073e-01],
       [2.65198231e-01],
       [9.85978063e-01],
       [6.78466887e-01],
       [9.40115879e-01],
       [9.98370905e-01],
       [5.59589491e-02],
       [2.29851894e-02],
       [9.64127320e-01],
       [8.18575180e-01],
       [7.04975686e-01],
       [9.95385701e-01],
       [9.57928610e-01],
       [4.88127812e-01],
       [9.00196247e-01],
       [9.07483607e-01],
       [6.47254456e-03],
       [9.98021147e-01],
       [7.81554395e-01],
       [2.16284063e-01],
       [3.00310824e-01],
       [4.58830393e-04],
       [7.53825836e-01],
       [8.36510258e-01],
       [3.63475852e-01],
       [5.08142103e-01],
       [2.87118251e-01],
       [2.61751072e-01],
       [2.93808573e-01],
       [7.87815691e-01],
       [4.33255745e-01],
       [7.71783906e-02],
       [7.68245249e-01],
       [1.78705752e-01],
       [4.73729029e-02],
       [6.15380075e-02],
       [5.13043240e-01],
       [8.57300321e-01],
       [2.22727250e-02],


In [40]:
ximp_mat

array([[1.48932624e-01, 3.60980201e-01, 7.96844696e-01, 1.27049073e-01,
        7.61135784e-01, 8.15039512e-02, 2.65198231e-01, 6.05699086e-01,
        4.92276481e-01, 9.85978063e-01, 7.83340492e-01, 9.75993951e-02,
        6.78466887e-01, 9.96101879e-01, 1.96715702e-01, 9.40115879e-01,
        9.98370905e-01, 5.59589491e-02, 2.29851894e-02, 8.36068373e-01,
        8.16952710e-01, 9.64127320e-01, 4.89717061e-01, 8.18575180e-01,
        9.60790149e-01, 9.91831748e-01, 5.24461110e-01, 6.87389818e-01,
        4.37996597e-01, 3.46171305e-01, 4.03302545e-01, 8.33979208e-01,
        1.69719285e-01, 8.43524049e-01, 7.80997427e-01, 8.66363257e-01,
        7.04975686e-01, 4.62713009e-01, 1.18136758e-01, 9.95385701e-01,
        6.06201269e-02, 2.52965370e-01, 9.57928610e-01, 3.69239608e-02,
        2.11213223e-01, 3.02912362e-02, 2.98090761e-02, 2.45488049e-01,
        8.32831808e-01, 7.32196394e-02, 9.36574835e-01, 5.97333022e-01,
        5.60407578e-01, 4.88127812e-01, 1.22855809e-01, 6.434706

In [33]:
idx_mis=5

In [34]:
X_imp[miss_coords, idx_mis]

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, na

In [30]:
count_missing_by_col(X_imp)[6]

210

In [18]:
model.impute(X_imp, [], T_orig, pcs_orig, d_orig)

IndexError: index 1 is out of bounds for axis 0 with size 1