In [1]:
from copy import deepcopy
import numpy as np
import pandas as pd
import pyvinecopulib as pv

In [2]:
n = 1000
d = 4
p = 0.2

structure = pv.RVineStructure.simulate(d)

pair_copulas = []
for j in range(d - 1):
    tmp = []
    pair_copulas.append(tmp)
    for _ in range(d - j - 1):
        rho = np.minimum(np.maximum(np.random.beta(1, 0.75), 0.01), 0.99)
        tmp.append(pv.Bicop(family=pv.BicopFamily.gaussian, parameters=[[rho]]))

cop1 = pv.Vinecop(structure, pair_copulas)

U = cop1.simulate(n=n, seeds=list(1 + np.arange(d)))

### Refit on subset

In [3]:
old_to_new_map = {1:4, 2:1, 3:2, 4:3}
new_to_old_map = {1:2, 2:3, 3:4, 4:1}

In [4]:
vcop_controls = pv.FitControlsVinecop(family_set=[pv.BicopFamily.gaussian])
cop2 = pv.Vinecop(data=U[:, [1,2,3]], controls=vcop_controls)

### Expand

In [5]:
def make_triangular_array(d):
    pair_copulas = np.empty(shape=(d-1,), dtype='object')
    for j in range(d-1)[::-1]:
        pair_copulas[j] = list(np.empty(shape=(d-j-1,), dtype='object'))
    return list(pair_copulas)

def vfunc(fun, X1, X2, transpose=True):
    if transpose:
        return fun(np.vstack([np.array(X1), np.array(X2)]).T)
    else:
        return fun(np.vstack([np.array(X1), np.array(X2)]))

def get(X, i):
    return X[:, int(i - 1)]

def find(D, a_str):
    coord = np.argwhere(D == a_str)
    if coord.shape[0] == 1:
        return tuple(coord[0])

In [6]:
# args
cop = cop2
d = len(cop2.order) + 1

In [7]:
T = cop.matrix
HF1 = np.empty(shape=(d, d), dtype=object)
HF2 = np.empty(shape=(d, d), dtype=object)
CS = np.empty(shape=(d, d), dtype=object)
CC1 = np.empty(shape=(d, d), dtype=object)
CC2 = np.empty(shape=(d, d), dtype=object)
pair_copulas = make_triangular_array(4)

for j in range(d - 2)[::-1]:
    for i in range(d - j - 2):
        pair_copulas[i][j+1] = cop.get_pair_copula(i, j)
        var1 = cop.order[j]
        var2 = T[i, j]
        CS[i][j+1] = ','.join(list(map(str, sorted(T[:i,j]))))
        if CS[i, j+1] == '':
            arg1 = get(U, new_to_old_map[var1])
            arg2 = get(U, new_to_old_map[var2])
            HF1[i, j+1] = vfunc(cop.get_pair_copula(i, j).hfunc2, arg1, arg2)
            HF2[i, j+1] = vfunc(cop.get_pair_copula(i, j).hfunc1, arg1, arg2)
            CC1[i, j+1] = f'{var1}|{var2}'
            CC2[i, j+1] = f'{var2}|{var1}'
        else:
            arg1, arg2 = None, None
            key1 = f'{var1}|{CS[i, j+1]}'
            key2 = f'{var2}|{CS[i, j+1]}'
            for CC, HF in zip([CC1, CC2], [HF1, HF2]):
                coord = find(CC, key1)
                if (arg1 is None) and (coord is not None):
                    arg1 = HF[coord]
                coord = find(CC, key2)
                if (arg2 is None) and (coord is not None):
                    arg2 = HF[coord]
            assert (arg1 is not None) and (arg2 is not None)
            HF1[i, j+1] = vfunc(cop.get_pair_copula(i, j).hfunc2, arg1, arg2)
            HF2[i, j+1] = vfunc(cop.get_pair_copula(i, j).hfunc1, arg1, arg2)
            CC1[i, j+1] = f'{var1}|' + ','.join(sorted(CS[i, j+1].split(',') + [str(var2)]))
            CC2[i, j+1] = f'{var2}|' + ','.join(sorted(CS[i, j+1].split(',') + [str(var1)]))

In [8]:
bcop_controls = pv.FitControlsBicop(family_set=[pv.BicopFamily.gaussian])

T_new = np.zeros(shape=(d, d), dtype=np.uint64)
T_new[d-1,0] = d
T_new[:-1, 1:] = cop.matrix

j = 0
for i in range(d-1):
    var1 = d
    var2 = cop.order[d-i-2]
    T_new[i,j] = var2
    CS[i,j] = ','.join(list(map(str, sorted(T_new[:i,j]))))
    if CS[i, j] == '':
        arg1 = get(U, new_to_old_map[var1])
        arg2 = get(U, new_to_old_map[var2])
        bcop = pv.Bicop(data=np.vstack([arg1, arg2]).T, controls=bcop_controls)
        pair_copulas[i][j] = bcop
        HF1[i, j] = vfunc(bcop.hfunc2, arg1, arg2)
        HF2[i, j] = vfunc(bcop.hfunc1, arg1, arg2)
        CC1[i, j] = f'{var1}|{var2}'
        CC2[i, j] = f'{var2}|{var1}'
    else:
        arg1, arg2 = None, None
        key1 = f'{var1}|{CS[i, j]}'
        key2 = f'{var2}|{CS[i, j]}'
        for CC, HF in zip([CC1, CC2], [HF1, HF2]):
            coord = find(CC, key1)
            if (arg1 is None) and (coord is not None):
                arg1 = HF[coord]
            coord = find(CC, key2)
            if (arg2 is None) and (coord is not None):
                arg2 = HF[coord]
        assert (arg1 is not None) and (arg2 is not None)
        bcop = pv.Bicop(data=np.vstack([arg1, arg2]).T, controls=bcop_controls)
        pair_copulas[i][j] = bcop
        HF1[i, j] = vfunc(bcop.hfunc2, arg1, arg2)
        HF2[i, j] = vfunc(bcop.hfunc1, arg1, arg2)
        CC1[i, j] = f'{var1}|' + ','.join(sorted(CS[i, j].split(',') + [str(var2)]))
        CC2[i, j] = f'{var2}|' + ','.join(sorted(CS[i, j].split(',') + [str(var1)]))

In [16]:
cop3 = pv.Vinecop(T_new, pair_copulas)
cop3

<pyvinecopulib.Vinecop>
** Tree: 0
4,3 <-> Gaussian, parameters = -0.0696267
2,1 <-> Gaussian, parameters = 0.440436
1,3 <-> Gaussian, parameters = 0.611812
** Tree: 1
4,1 | 3 <-> Gaussian, parameters = 0.283047
2,3 | 1 <-> Gaussian, parameters = -0.268039
** Tree: 2
4,2 | 1,3 <-> Gaussian, parameters = 0.882852

In [17]:
cop1

<pyvinecopulib.Vinecop>
** Tree: 0
1,2 <-> Gaussian, parameters = 0.223508
4,3 <-> Gaussian, parameters = 0.10715
2,3 <-> Gaussian, parameters = 0.450914
** Tree: 1
1,3 | 2 <-> Gaussian, parameters = 0.889389
4,2 | 3 <-> Gaussian, parameters = 0.610502
** Tree: 2
1,4 | 3,2 <-> Gaussian, parameters = 0.0242227

In [19]:
U2 = cop3.simulate(1000)

In [29]:
cov2 = np.corrcoef(U2[:, [old_to_new_map[i + 1] - 1 for i in range(4)]].T)

In [30]:
cov1 = np.corrcoef(U.T)

In [33]:
cov1

array([[ 1.        ,  0.16758855,  0.859866  , -0.07367281],
       [ 0.16758855,  1.        ,  0.40051392,  0.57403568],
       [ 0.859866  ,  0.40051392,  1.        ,  0.04299718],
       [-0.07367281,  0.57403568,  0.04299718,  1.        ]])

In [34]:
cov2

array([[ 1.        ,  0.17243921,  0.8487276 , -0.03170326],
       [ 0.17243921,  1.        ,  0.45477682,  0.60733636],
       [ 0.8487276 ,  0.45477682,  1.        ,  0.11783834],
       [-0.03170326,  0.60733636,  0.11783834,  1.        ]])

In [37]:
np.linalg.norm(cov1-cov2, ord='fro')

0.15207650591486335