In [1]:
from IPython.display import display
from copy import deepcopy
import numpy as np
import pandas as pd
import pyvinecopulib as pv

def make_triangular_array(d):
    pair_copulas = np.empty(shape=(d-1,), dtype='object')
    for j in range(d-1)[::-1]:
        pair_copulas[j] = list(np.empty(shape=(d-j-1,), dtype='object'))
    return list(pair_copulas)

In [2]:
n = 1000
d = 4
p = 0.2

structure = pv.RVineStructure.simulate(d)

pair_copulas = []
for j in range(d - 1):
    tmp = []
    pair_copulas.append(tmp)
    for _ in range(d - j - 1):
        rho = np.minimum(np.maximum(np.random.beta(1, 0.75), 0.01), 0.99)
        tmp.append(pv.Bicop(family=pv.BicopFamily.gaussian, parameters=[[rho]]))

cop = pv.Vinecop(structure, pair_copulas)

U = cop.simulate(n=n, seeds=list(1 + np.arange(d)))

In [3]:
T = cop.matrix

pair_copulas = make_triangular_array(d)
for j in range(d):
    for i in range(d-j-1):
        pair_copulas[i][j] = cop.get_pair_copula(i,j)

In [4]:
def downsize_copula(T_whole, pair_copulas_in, T_tmp):
    if np.all(T == T_tmp):
        return T_whole, pair_copulas_in
    
    d = T_whole.shape[0]
    d2 = np.amax(np.count_nonzero(T_tmp,axis=0))
    i_tmp_lst, j_tmp_lst = np.where(T_tmp != 0)
    ax0_order = np.argsort(j_tmp_lst)
    i_tmp_lst = i_tmp_lst[ax0_order]
    j_tmp_lst = j_tmp_lst[ax0_order]

    assert len(j_tmp_lst) == len(i_tmp_lst) == d*(d-1)//2

    T_out = np.zeros(shape=(d2,d2), dtype=np.uint64)
    pair_copulas_out = make_triangular_array(d2)
    i2, j2 = 0, 0
    for i_tmp, j_tmp in zip(i_tmp_lst, j_tmp_lst):
        if i2 > d2-j2-1:
            j2 += 1
            i2 = 0
        T_out[i2, j2] = T_tmp[i_tmp, j_tmp]
        if i2 != d2-j2-1:
            pair_copulas_out[i2][j2] = pair_copulas_in[i_tmp][j_tmp]
        i2 += 1
    return T_out, pair_copulas_out

In [5]:
def remove_column(T_in, pair_copulas_in, var_mis, j):
    """
    remove column if var_mis in cond. set +
    remove all entries of that column's diagonal var
    """
    d = T_in.shape[0]
    if var_mis in T_in[:d-j-1,j]:
        T_tmp = deepcopy(T_in)
        var_diag = T_tmp[d-j-1, j]
        T_tmp[:, j] = 0
        T_tmp = np.where(T_tmp == var_diag, 0, T_tmp)
        return downsize_copula(T_in, pair_copulas_in, T_tmp)
    else:
        return T_in, pair_copulas_in

In [22]:
def remove_inbetween(T_in, pair_copulas_in, var_mis, j):
    """
    identify all vars between the diagonal and var_mis in the col j
    delete all columns with those vars in the diagonal
    delete all entries with those vars
    """
    d = T_in.shape[0]
    if var_mis in T_in[:d-j-2,j]:
        k = np.where(T[:d-j-1,j] == var_mis)[0].item()
        T_tmp = deepcopy(T_in)
        T_tmp[(k+1):d-j-1,j] = 0
        order = [T[d-j-1,j] for j in range(d)]
        for var_del in T[(k+1):d-j-1,j]:
            T_tmp[:,order.index(var_del)]=0
            T_tmp = np.where(T_tmp == var_del, 0, T_tmp)
        return downsize_copula(T_in, pair_copulas_in, T_tmp)
    else:
        return T_in, pair_copulas_in

In [21]:
T

array([[4, 4, 4, 4],
       [1, 1, 1, 0],
       [3, 3, 0, 0],
       [2, 0, 0, 0]], dtype=uint64)

In [23]:
T_tmp, pair_copulas_out = remove_inbetween(T, pair_copulas, var_mis=1, j=0)

In [24]:
T_tmp

array([[4, 4, 4],
       [1, 1, 0],
       [2, 0, 0]], dtype=uint64)