In [12]:
import numpy as np

In [13]:
# Parameters
window_size = 5
number_of_sensors = 5
sparsity_inv_matrix = 0.2
rand_seed = 10
number_of_clusters = 3
cluster_ids = [0, 1, 0] # determines which cluster cov should be used to generate data segment
break_points = np.array([1, 2, 3]) * 200
out_file_name = "synthetic_data.csv"
np.random.seed(rand_seed)

In [14]:
# Store block matrices
block_matrices = {}
num_blocks = window_size
size_blocks = number_of_sensors
seg_ids = cluster_ids

In [15]:
def generate_inverse(rand_seed):
    # creates inv cov matrices with the sparsity defined by sparsity_inv_matrix
    np.random.seed(rand_seed)

    def gen_inv_cov(size, low=0.3, upper=0.6, portion=0.2, symmetric=True):
        # generate sparse inv cov matrix, the result will be symmetric
        portion = portion / 2
        S = np.zeros((size, size))
        for i in range(size):
            for j in range(size):
                if np.random.rand() < portion:
                    value = (np.random.randint(2) - 0.5) * 2 * (low + (upper - low) * np.random.rand())
                    S[i, j] = value
        if symmetric:
            S = S + S.T
        return np.matrix(S)

    def gen_rand_inv(size, low=0.3, upper=0.6, portion=0.2):
        # generate sparse inv cov matrix, the result will not be symmetric
        S = np.zeros((size, size))
        for i in range(size):
            for j in range(size):
                if np.random.rand() < portion:
                    value = (np.random.randint(2) - 0.5) * 2 * (low + (upper - low) * np.random.rand())
                    S[i, j] = value
        return np.matrix(S)

    for block in range(num_blocks):
        if block == 0:
            block_matrices[block] = gen_inv_cov(size=size_blocks, portion=sparsity_inv_matrix, symmetric=(block == 0))
        else:
            block_matrices[block] = gen_rand_inv(size=size_blocks, portion=sparsity_inv_matrix)

    inv_matrix = np.zeros([num_blocks * size_blocks, num_blocks * size_blocks])

    # Fill the inverse matrix with blocks
    for block_i in range(num_blocks):
        for block_j in range(num_blocks):
            block_num = abs(block_i - block_j)
            if block_i > block_j:
                inv_matrix[block_i * size_blocks:(block_i + 1) * size_blocks, block_j * size_blocks:(block_j + 1) * size_blocks] = block_matrices[block_num]
            else:
                inv_matrix[block_i * size_blocks:(block_i + 1) * size_blocks, block_j * size_blocks:(block_j + 1) * size_blocks] = block_matrices[block_num].T

    # Ensure the matrix is positive definite
    eigs, _ = np.linalg.eig(inv_matrix)
    lambda_min = min(eigs)
    inv_matrix = inv_matrix + (0.1 + abs(lambda_min)) * np.identity(size_blocks * num_blocks)

    eigs, _ = np.linalg.eig(inv_matrix)
    print("modified Eigenvalues are:", np.sort(eigs))

    return inv_matrix


In [16]:
# Generate points
num_clusters = number_of_clusters
cluster_mean = np.zeros([size_blocks, 1])
cluster_mean_stacked = np.zeros([size_blocks * num_blocks, 1])

cluster_inverses = {}
cluster_covariances = {}

for cluster in range(num_clusters):
    cluster_inverses[cluster] = generate_inverse(rand_seed=cluster)
    cluster_covariances[cluster] = np.linalg.inv(cluster_inverses[cluster])

    np.savetxt(f"inverse_covariance_cluster_{cluster}.csv", cluster_inverses[cluster], delimiter=",", fmt='%1.6f')
    np.savetxt(f"covariance_cluster_{cluster}.csv", cluster_covariances[cluster], delimiter=",", fmt='%1.6f')


modified Eigenvalues are: [0.1        0.29030178 0.41337634 0.62079652 0.76699303 0.84087622
 0.9271657  1.06680631 1.1515207  1.50330742 1.64971793 1.68068967
 1.73597429 1.77838288 1.8451119  1.96092392 2.27015193 2.30470308
 2.53735724 2.7503323  2.79848672 3.11894074 3.17406739 3.45667681
 3.66313147]
modified Eigenvalues are: [0.1        0.14400401 0.43530673 0.56735097 0.67123772 0.85007428
 0.96027899 1.27499486 1.30722399 1.45964606 1.50847532 1.56295124
 1.69639625 1.9974138  2.19910609 2.3801121  2.40755277 2.52793766
 2.69283446 2.92932086 2.95289839 3.20930286 3.33976615 3.50636637
 3.60390107]
modified Eigenvalues are: [0.1        0.21421966 0.2689519  0.42469752 0.50981103 0.60454014
 0.69501827 1.07471026 1.12006673 1.19315204 1.28966759 1.28966759
 1.28966759 1.28966759 1.28966759 1.38618314 1.45926845 1.50462492
 1.88431691 1.97479504 2.06952415 2.15463766 2.31038328 2.36511552
 2.47933518]


In [17]:
# generate data acc to the cluster covariances and cluster ids
Data = np.zeros([break_points[-1], size_blocks])

for counter in range(len(break_points)):
    break_pt = break_points[counter]
    cluster = seg_ids[counter]
    old_break_pt = 0 if counter == 0 else break_points[counter - 1]

    for num in range(old_break_pt, break_pt):
        new_mean = cluster_mean
        cov_matrix = cluster_covariances[cluster][0:size_blocks, 0:size_blocks]
        # sampling from multivariate normal distribution
        new_row = np.random.multivariate_normal(new_mean.reshape(size_blocks), cov_matrix)
        Data[num, :] = new_row


In [18]:
# save dataset
np.savetxt(out_file_name, Data, delimiter=",", fmt='%1.4f')