In [101]:
from random import choice
import numpy as np
from matplotlib import pyplot as plt
import pyvinecopulib as pv

def get_ced_cing(T, cop=None):
    d = T.shape[1]
    cing = []
    ced = []
    param = []
    for j in range(d):
        for i1 in range(d-j-1):
            ced.append(sorted((T[i1,j], T[d-j-1, j])))
            tmp = []
            for i2 in range(i1):
                tmp.append(T[i2,j])
            cing.append(sorted(tmp))
            if cop is not None:
                param.append(cop.get_parameters(i1, j)[0][0])
    return ced, cing, param

def find(a, cing_len, ced, cing):
    out = [i for i in range(len(ced)) if a in ced[i]]
    matched = False
    for i in out:
        if len(cing[i]) == cing_len:
            matched = True
            break
    assert matched, f'bad argument, a={a}, cing_len={cing_len}'
    return ced[i][1] if ced[i][0] == a else ced[i][0]

def diagonalize(T1, a):
    d = T1.shape[1]
    if a == T1[d-1, 0]:
        return T1
    assert a == T1[d-2, 0], f'cannot be diagonalized with {a}'

    T2 = np.zeros(shape=T1.shape, dtype=np.uint64)
    T2[d-1, 0] = a
    order = [a]

    ced, cing, _ = get_ced_cing(T1)
    for j in range(d-1):
        for i in range(d-j-1):
            T2[i,j] = find(T2[d-j-1, j], i, ced, cing)

        remove_idx = [i for i, c in enumerate(ced) for k in order if k in c]
        keep_idx = set(range(len(ced))).difference(set(remove_idx))

        ced = [ced[i] for i in keep_idx]
        cing = [cing[i] for i in keep_idx]

        T2[d-j-2, j+1] = T2[d-j-2, j]
        order.append(T2[d-j-2, j+1])
    return T2

def make_diagonal_copula(cop1, a):
    T1 = cop1.matrix
    d = T1.shape[0]
    ced, cing, param = get_ced_cing(T1, cop1)

    T2 = diagonalize(T1, a)

    pair_copulas = []
    for t in range(d-1):
        cur = []
        pair_copulas.append(cur)
        for e in range(d-1-t):
            cur.append(
                pv.Bicop(
                    family=pv.BicopFamily.gaussian,
                    parameters=[param[ced.index(sorted((T2[d-1-e,e], T2[t,e])))]]
                )
            )
    cop2 = pv.Vinecop(matrix=T2, pair_copulas=pair_copulas)
    return cop2

In [81]:
d = 5 # dimension
n = 1000 
p = 0.5 # dropout 

In [82]:
structure = pv.RVineStructure.simulate(d=d)
pair_copulas = []
for t in range(d-1):
    tree_lst = []
    for e in range(d-t-1):
        rho = np.clip(np.random.beta(2, 2), a_min=0.05, a_max=0.95)
        tree_lst.append(pv.Bicop(family=pv.BicopFamily.gaussian, parameters=[[rho]]))
    pair_copulas.append(tree_lst)

In [83]:
cop1 = pv.Vinecop(structure, pair_copulas)

In [84]:
U = cop1.simulate(n)

In [85]:
# imputable variables
m1 = cop1.matrix[d-1,0]
m2 = cop1.matrix[d-2,0]
varm = int(choice([m1,m2]))

In [86]:
missing = np.random.binomial(1,p,n).astype(bool)

In [87]:
U_mask = np.copy(U)
U_mask[missing, varm-1] = np.nan

In [95]:
cop2 = pv.Vinecop(d)
controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.gaussian])
cop2.select(U_mask, controls)

In [104]:
cop3 = make_diagonal_copula(cop2, varm)

In [115]:
assert cop3.matrix[d-1,0] == varm

In [135]:
U_mask

array([[0.62785246, 0.67997501, 0.1155818 , 0.19804429, 0.17941567],
       [0.70812204, 0.80497653, 0.70144601, 0.88029561, 0.43188006],
       [       nan, 0.42064845, 0.37777043, 0.42560062, 0.89175307],
       ...,
       [0.74437821, 0.32503605, 0.8990098 , 0.89825667, 0.85742635],
       [0.45573825, 0.11605396, 0.83568417, 0.63302122, 0.19959692],
       [       nan, 0.70257734, 0.90723468, 0.94963991, 0.9515546 ]])

In [153]:
cop3.matrix

array([[4, 4, 4, 4, 4],
       [5, 2, 2, 2, 0],
       [2, 5, 5, 0, 0],
       [3, 3, 0, 0, 0],
       [1, 0, 0, 0, 0]], dtype=uint64)

In [154]:
inv1 = cop3.get_pair_copula(d-2,0).hinv2(
    np.hstack([
        np.random.uniform(size=n)[:, None],
        cop3.get_pair_copula(3, 0).hfunc2(
            np.hstack([
                cop3.get_pair_copula(2,1).hfunc2(
                    np.hstack([
                        cop3.get_pair_copula(0,1).hfunc2(
                            np.hstack([
                                U_mask[:, int(cop3.matrix[3,1] - 1)][:, None], 
                                U_mask[:, int(cop3.matrix[0,1] - 1)][:, None]
                            ])
                        )[:, None], # 3;4
                        cop3.get_pair_copula(0,2).hfunc2(
                            np.hstack([
                                U_mask[:, int(cop3.matrix[2,2] - 1)][:, None], 
                                U_mask[:, int(cop3.matrix[0,2] - 1)][:, None]
                            ])
                        )[:, None]  # 5;4
                    ])
                )[:, None], # 3;5,4
                cop3.get_pair_copula(1,2).hfunc1(
                    np.hstack([
                        cop3.get_pair_copula(0,3).hfunc2(
                            np.hstack([
                                U_mask[:, int(cop3.matrix[1,3] - 1)][:, None], 
                                U_mask[:, int(cop3.matrix[0,3] - 1)][:, None]
                            ])
                        )[:, None], #2,4
                        cop3.get_pair_copula(0,2).hfunc2(
                            np.hstack([
                                U_mask[:, int(cop3.matrix[2,2] - 1)][:, None], 
                                U_mask[:, int(cop3.matrix[0,2] - 1)][:, None]
                            ])
                        )[:, None]  #5,4
                    ])
                )[:, None], # 2;5,4
            ])
        )[:, None] # 3;2,5,4
    ])
)
inv2 = cop3.get_pair_copula(d-3, 0).hinv2(
    np.hstack([
        inv1[:, None],
        cop3.get_pair_copula(1, 2).hfunc1( # note: sometimes use hfunc1
            np.hstack([
                cop3.get_pair_copula(0, 2).hfunc2(
                    np.hstack([
                        U_mask[:, int(cop3.matrix[2,2] - 1)][:, None], 
                        U_mask[:, int(cop3.matrix[0,2] - 1)][:, None]
                    ]))[:, None], # 5;4
                cop3.get_pair_copula(0, 3).hfunc2(
                    np.hstack([
                        U_mask[:, int(cop3.matrix[1,3] - 1)][:, None], 
                        U_mask[:, int(cop3.matrix[0,3] - 1)][:, None]
                    ]))[:, None], # 2;4
            ])
        )[:, None] # 2;5,4
    ])
)
inv3 = cop3.get_pair_copula(d-4,0).hinv2(
    np.hstack([
        inv2[:, None],
        cop3.get_pair_copula(0, 2).hfunc2(
            np.hstack([
                U_mask[:, int(cop3.matrix[2,2] - 1)][:, None], 
                U_mask[:, int(cop3.matrix[0,2] - 1)][:, None]
            ]))[:, None] # 5;4
    ])
)
inv4 = cop3.get_pair_copula(d-5,0).hinv2( # note: d-5 == 0
    np.hstack([
        inv3[:, None],
        U_mask[:, int(cop3.matrix[0,0]-1)][:, None] # 4
    ])
)