In [None]:
from numpy.random import rand, multivariate_normal
from numpy import arange, zeros, dot, ones
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from ite.cost.x_factory import co_factory
from ite.cost.x_analytical_values import analytical_value_d_tsallis
from collections import defaultdict
import pandas as pd
import seaborn as sns

In [None]:
def generate_normal_4(d, n_samples, fac):
    mean = np.zeros(4)
    
    if fac == 22:
        cov = [[1, d, 0, 0],
               [d, 1, 0, 0],
               [0, 0, 1, d],
               [0, 0, d, 1]]
    if fac == 13:
        cov = [[1, 0, 0, 0],
               [0, 1, d, d],
               [0, d, 1, d],
               [0, d, d, 1]]
    if fac == 4:
        cov = [[1, d, d, d],
               [d, 1, d, d],
               [d, d, 1, d],
               [d, d, d, 1]]
    # Generate the multivariate normal data
    mvn_data = np.random.multivariate_normal(mean, cov, n_samples)

    return mvn_data

In [None]:
def generate_normal_4_check(d, n_samples, fac):
    mean = np.zeros(4)
    
    if fac == '2e':
        cov = [[1, 1, d, d],
               [1, 1, d, d],
               [d, d, 1, 1],
               [d, d, 1, 1]]
    if fac == '2s1e':
        cov = [[1, 1, d, d],
               [1, 1, d, d],
               [d, d, 1, d],
               [d, d, d, 1]]
    if fac == '4s':
        cov = [[1, d, d, d],
               [d, 1, d, d],
               [d, d, 1, d],
               [d, d, d, 1]]
    if fac == '1s1t':
        cov = [[1, 1, 1, d],
               [1, 1, 1, d],
               [1, 1, 1, d],
               [d, d, d, 1]]
    # Generate the multivariate normal data
    mvn_data = np.random.multivariate_normal(mean, cov, n_samples)

    # Generate noise
    noise = np.random.normal(0, 0.1, (n_samples, 4))

    # Add noise to the data
    data_with_noise = mvn_data + noise

    return data_with_noise

In [None]:
def shuffle_data(data, shuffle_indices):
    # Create a copy of the original data to avoid modifying the input in place.
    shuffled_data = data.copy()

    for indices in shuffle_indices:
        # Generate a random permutation for the indices.
        permutation = np.random.permutation(shuffled_data.shape[0])

        # Apply the permutation to the specified indices.
        for idx in indices:
            shuffled_data[:, idx] = shuffled_data[permutation, idx]

    return shuffled_data

In [None]:
def Streitberg_4(X, div_func):
    n = X.shape[0]
    # X_fully_shuffled = shuffle_data(X[:int(0.5 * n)], [[0], [1], [2]])
    # X = X[int(0.5 * n):]
    X_fully_shuffled = shuffle_data(X, [[0], [1], [2]])
    p1234 = div_func(X, X_fully_shuffled)
    p1p234 = div_func(X[:,[1, 2, 3]], X_fully_shuffled[:,[1, 2, 3]])
    p2p134 = div_func(X[:,[0, 2, 3]], X_fully_shuffled[:,[0, 2, 3]])
    p3p124 = div_func(X[:,[0, 1, 3]], X_fully_shuffled[:,[0, 1, 3]])
    p4p123 = div_func(X[:,[0, 1, 2]], X_fully_shuffled[:,[0, 1, 2]])
    p12p34 = div_func(shuffle_data(X, [[0, 1]]), X_fully_shuffled)
    p13p24 = div_func(shuffle_data(X, [[0, 2]]), X_fully_shuffled)
    p14p23 = div_func(shuffle_data(X, [[0, 3]]), X_fully_shuffled)
    p1p2p34 = div_func(X[:,[2, 3]], X_fully_shuffled[:,[2, 3]])
    p1p3p24 = div_func(X[:,[1, 3]], X_fully_shuffled[:,[1, 3]])
    p1p4p23 = div_func(X[:,[1, 2]], X_fully_shuffled[:,[1, 2]])
    p2p3p14 = div_func(X[:,[0, 3]], X_fully_shuffled[:,[0, 3]])
    p2p4p13 = div_func(X[:,[0, 2]], X_fully_shuffled[:,[0, 2]])
    p3p4p12 = div_func(X[:,[0, 1]], X_fully_shuffled[:,[0, 1]])

    streitberg_4 = (p1234 - (p1p234 + p2p134 + p3p124 + p4p123) - (p12p34 + p13p24 + p14p23)
                    + 2 * (p1p2p34 + p1p3p24 + p1p4p23 + p2p3p14 + p2p4p13 + p3p4p12))

    return streitberg_4

def Lancaster_4(X, div_func):
    n = X.shape[0]
    # X_fully_shuffled = shuffle_data(X[:int(0.5 * n)], [[0], [1], [2]])
    # X = X[int(0.5 * n):]
    X_fully_shuffled = shuffle_data(X, [[0], [1], [2]])
    p1234 = div_func(X, X_fully_shuffled)
    
    p1p234 = div_func(X[:,[1, 2, 3]], X_fully_shuffled[:,[1, 2, 3]])
    p2p134 = div_func(X[:,[0, 2, 3]], X_fully_shuffled[:,[0, 2, 3]])
    p3p124 = div_func(X[:,[0, 1, 3]], X_fully_shuffled[:,[0, 1, 3]])
    p4p123 = div_func(X[:,[0, 1, 2]], X_fully_shuffled[:,[0, 1, 2]])
    
    p1p2p34 = div_func(X[:,[2, 3]], X_fully_shuffled[:,[2, 3]])
    p1p3p24 = div_func(X[:,[1, 3]], X_fully_shuffled[:,[1, 3]])
    p1p4p23 = div_func(X[:,[1, 2]], X_fully_shuffled[:,[1, 2]])
    p2p3p14 = div_func(X[:,[0, 3]], X_fully_shuffled[:,[0, 3]])
    p2p4p13 = div_func(X[:,[0, 2]], X_fully_shuffled[:,[0, 2]])
    p3p4p12 = div_func(X[:,[0, 1]], X_fully_shuffled[:,[0, 1]])

    Lancaster_4 = (p1234 - (p1p234 + p2p134 + p3p124 + p4p123)
                    +  (p1p2p34 + p1p3p24 + p1p4p23 + p2p3p14 + p2p4p13 + p3p4p12))

    return Lancaster_4

In [None]:
def TC4(X, div_func):
    n = X.shape[0]
    X_fully_shuffled = shuffle_data(X, [[0], [1], [2]]) 
    p1234 = div_func(X, X_fully_shuffled)
    return p1234

# K choice

In [None]:
ks = [5, 10, 15, 20, 25, 30]
ds = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]

In [None]:
normal4_dict = defaultdict(lambda: defaultdict(int))
for k in tqdm(ks):
    cost_name = 'BDTsallis_KnnK'  # dim >= 1
    co = co_factory(cost_name, mult=True, alpha=0.5, k=k)  # cost object 
    for d in ds:
        divs = 0
        for i in np.arange(200):
            X = generate_normal_4(d=d, n_samples=80, fac=4)
            div = Streitberg_4(X, co.estimation)  # broadcast
            divs += div
        normal4_dict[k][d] = divs/200

# N choice

In [None]:
ns = [50, 80, 200, 500, 1000, 3000]
# ns = [3000]
ds = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
# ds = [0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]

In [None]:
normal4_dict = defaultdict(lambda: defaultdict(int))
for n in ns:
    cost_name = 'BDTsallis_KnnK'  # dim >= 1
    co = co_factory(cost_name, mult=True, alpha=0.5, k=30)  # cost object 
    for d in tqdm(ds):
        divs = 0
        for i in np.arange(200):
            X = generate_normal_4(d=d, n_samples=n, fac=4)
            div = Streitberg_4(X, co.estimation)  # broadcast
            divs += div
        normal4_dict[n][d] = divs/200

# XOR Monotonic

In [None]:
cost_name = 'BDTsallis_KnnK'  # dim >= 1
co = co_factory(cost_name, mult=True, alpha=0.5, k=30)

In [None]:
def generate_xor_4way(length, n_sample, noise_sample):
    x1 = np.random.uniform(0, length, n_sample).reshape(-1, 1)
    x2 = np.random.uniform(0, length, n_sample).reshape(-1, 1)
    x3 = np.random.uniform(0, length, n_sample).reshape(-1, 1)
    x4 = np.random.uniform(0, length, n_sample).reshape(-1, 1)
    x4[noise_sample:] = (x1[noise_sample:] + x2[noise_sample:] + x3[noise_sample:]) % 3

    data = np.hstack((x1, x2, x3, x4))

    return data

In [None]:
xor_dict = defaultdict(int)
ds = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]
# ds = [0.95, 1]
for d in tqdm(ds):
    divs = 0
    for i in np.arange(100):
        X = generate_xor_4way(3, 1000, int(1000-1000*d))
        div = Streitberg_4(X, co.estimation)  # broadcast
        divs += div
    xor_dict[d] = divs/100

# COPY Monotonic

In [None]:
def generate_copy_4way(length, n_sample, noise_sample):
    x = np.random.uniform(0, length, n_sample).reshape(-1, 1)
    x1 = np.random.uniform(0, length, n_sample).reshape(-1, 1)
    x2 = np.random.uniform(0, length, n_sample).reshape(-1, 1)
    x3 = np.random.uniform(0, length, n_sample).reshape(-1, 1)
    x4 = np.random.uniform(0, length, n_sample).reshape(-1, 1)
    
    x1[noise_sample:] = x[noise_sample:]
    x2[noise_sample:] = x[noise_sample:]
    x3[noise_sample:] = x[noise_sample:]
    x4[noise_sample:] = x[noise_sample:]
    
    data = np.hstack((x1, x2, x3, x4))
    return data

In [None]:
cost_name = 'BDTsallis_KnnK'  # dim >= 1
co = co_factory(cost_name, mult=True, alpha=0.5, k=30)

In [None]:
copy_dict = defaultdict(int)
ds = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]
# ds = [0.95, 1]
for d in tqdm(ds):
    divs = 0
    for i in np.arange(200):
        X = generate_copy_4way(3, 1000, int(1000-1000*d))
        div = Streitberg_4(X, co.estimation)  # broadcast
        divs += div
    copy_dict[d] = divs/200